diff options
author | YamaArashi <shadow962@live.com> | 2016-04-22 18:25:40 -0700 |
---|---|---|
committer | YamaArashi <shadow962@live.com> | 2016-04-22 18:25:40 -0700 |
commit | 0df6676bfccb1d68aa741997702d2a558110d322 (patch) | |
tree | 440a7906ea6e75e17079ff1aed44900942b1e85a | |
parent | 0601fb4d0013bd0f2508364b313acbdc371da674 (diff) |
use preprocessor
-rw-r--r-- | Makefile | 45 | ||||
-rw-r--r-- | asm/charmap.s | 280 | ||||
-rw-r--r-- | charmap.txt | 280 | ||||
-rw-r--r-- | data/data1.s | 1 | ||||
-rw-r--r-- | data/data2.s | 1 | ||||
-rw-r--r-- | fix_local_labels.pl | 28 | ||||
-rw-r--r-- | tools/preproc/.gitignore | 1 | ||||
-rw-r--r-- | tools/preproc/LICENSE | 19 | ||||
-rw-r--r-- | tools/preproc/Makefile | 15 | ||||
-rw-r--r-- | tools/preproc/asm_file.cpp | 563 | ||||
-rw-r--r-- | tools/preproc/asm_file.h | 71 | ||||
-rw-r--r-- | tools/preproc/char_util.h | 65 | ||||
-rw-r--r-- | tools/preproc/charmap.cpp | 397 | ||||
-rw-r--r-- | tools/preproc/charmap.h | 64 | ||||
-rw-r--r-- | tools/preproc/preproc.cpp | 83 | ||||
-rw-r--r-- | tools/preproc/preproc.h | 52 | ||||
-rw-r--r-- | tools/preproc/utf8.cpp | 94 | ||||
-rw-r--r-- | tools/preproc/utf8.h | 34 |
18 files changed, 1766 insertions, 327 deletions
@@ -10,6 +10,8 @@ GFX := @tools/gbagfx/gbagfx SCANINC := tools/scaninc/scaninc +PREPROC := tools/preproc/preproc + # Clear the default suffixes. .SUFFIXES: @@ -20,19 +22,22 @@ SCANINC := tools/scaninc/scaninc .PHONY: rom tools gbagfx scaninc clean compare deps -CSRCS := $(wildcard src/*.c) -OBJS := asm/crt0.o asm/rom1.o asm/rom2.o asm/rom3.o asm/rom4.o asm/rom5.o \ - asm/libgcnmultiboot.o asm/libmks4agb.o asm/libagbsyscall.o asm/libgcc.o \ - src/string_util.o src/rtc.o src/play_time.o src/task.o \ - src/agb_flash.o src/agb_flash_1m.o src/agb_flash_mx.o src/siirtc.o \ - data/data1.o data/data2.o +C_SRCS := $(wildcard src/*.c) +C_OBJS := $(C_SRCS:%.c=%.o) + +ASM_OBJS := asm/crt0.o asm/rom1.o asm/rom2.o asm/rom3.o asm/rom4.o asm/rom5.o \ + asm/libgcnmultiboot.o asm/libmks4agb.o asm/libagbsyscall.o asm/libgcc.o + +DATA_ASM_OBJS := data/data1.o data/data2.o + +OBJS := $(C_OBJS) $(ASM_OBJS) $(DATA_ASM_OBJS) ROM := pokeruby.gba ELF := $(ROM:.gba=.elf) rom: $(ROM) -tools: gbagfx scaninc +tools: gbagfx scaninc preproc gbagfx: cd tools/gbagfx && make @@ -40,6 +45,9 @@ gbagfx: scaninc: cd tools/scaninc && make +preproc: + cd tools/preproc && make + # For contributors to make sure a change didn't affect the contents of the ROM. compare: $(ROM) @$(SHA1) rom.sha1 @@ -59,22 +67,25 @@ include tilesets.mk %.gbapal: %.pal ; $(GFX) $< $@ %.lz: % ; $(GFX) $< $@ -$(OBJS): $(CSRCS:src/%.c=src/%.s) +src/siirtc.o: CFLAGS := -mthumb-interwork -Iinclude -src/siirtc.s: CFLAGS := -mthumb-interwork -Iinclude +src/agb_flash.o: CFLAGS := -O -mthumb-interwork -Iinclude +src/agb_flash_1m.o: CFLAGS := -O -mthumb-interwork -Iinclude +src/agb_flash_mx.o: CFLAGS := -O -mthumb-interwork -Iinclude -src/agb_flash.s: CFLAGS := -O -mthumb-interwork -Iinclude -src/agb_flash_1m.s: CFLAGS := -O -mthumb-interwork -Iinclude -src/agb_flash_mx.s: CFLAGS := -O -mthumb-interwork -Iinclude +$(C_OBJS): %.o : %.c + $(CC) $(CFLAGS) -o $*.s $< -S + echo -e ".text\n\t.align\t2, 0\n" >> $*.s + $(AS) $(ASFLAGS) -o $@ $*.s -src/%.s: src/%.c - $(CC) $(CFLAGS) -o $@ $< -S - echo -e ".text\n\t.align\t2, 0\n" >> $@ +%.o : dep = $(shell $(SCANINC) $*.s) -%.o: dep = $(shell $(SCANINC) $(@D)/$*.s) -%.o: %.s $$(dep) +$(ASM_OBJS): %.o: %.s $$(dep) $(AS) $(ASFLAGS) -o $@ $< +$(DATA_ASM_OBJS): %.o: %.s $$(dep) + $(PREPROC) $< charmap.txt | $(AS) $(ASFLAGS) -o $@ + # Link objects to produce the ROM. $(ROM): $(OBJS) ./pokeld -T ld_script.txt -T iwram_syms.txt -T ewram_syms.txt -o $(ELF) $(OBJS) diff --git a/asm/charmap.s b/asm/charmap.s deleted file mode 100644 index 4b0e19917..000000000 --- a/asm/charmap.s +++ /dev/null @@ -1,280 +0,0 @@ - .charmap " ", 0x00 - .charmap "é", 0x1B - .charmap "&", 0x2D - .charmap "%", 0x5B - .charmap "(", 0x5C - .charmap ")", 0x5D - .charmap "0", 0xA1 - .charmap "1", 0xA2 - .charmap "2", 0xA3 - .charmap "3", 0xA4 - .charmap "4", 0xA5 - .charmap "5", 0xA6 - .charmap "6", 0xA7 - .charmap "7", 0xA8 - .charmap "8", 0xA9 - .charmap "9", 0xAA - .charmap "!", 0xAB - .charmap "?", 0xAC - .charmap ".", 0xAD - .charmap "-", 0xAE - .charmap "·", 0xAF - .charmap "…", 0xB0 - .charmap "“", 0xB1 - .charmap "”", 0xB2 - .charmap "‘", 0xB3 - .charmap "’", 0xB4 - .charmap "♂", 0xB5 - .charmap "♀", 0xB6 - .charmap "¥", 0xB7 - .charmap ",", 0xB8 - .charmap "×", 0xB9 - .charmap "/", 0xBA - .charmap "A", 0xBB - .charmap "B", 0xBC - .charmap "C", 0xBD - .charmap "D", 0xBE - .charmap "E", 0xBF - .charmap "F", 0xC0 - .charmap "G", 0xC1 - .charmap "H", 0xC2 - .charmap "I", 0xC3 - .charmap "J", 0xC4 - .charmap "K", 0xC5 - .charmap "L", 0xC6 - .charmap "M", 0xC7 - .charmap "N", 0xC8 - .charmap "O", 0xC9 - .charmap "P", 0xCA - .charmap "Q", 0xCB - .charmap "R", 0xCC - .charmap "S", 0xCD - .charmap "T", 0xCE - .charmap "U", 0xCF - .charmap "V", 0xD0 - .charmap "W", 0xD1 - .charmap "X", 0xD2 - .charmap "Y", 0xD3 - .charmap "Z", 0xD4 - .charmap "a", 0xD5 - .charmap "b", 0xD6 - .charmap "c", 0xD7 - .charmap "d", 0xD8 - .charmap "e", 0xD9 - .charmap "f", 0xDA - .charmap "g", 0xDB - .charmap "h", 0xDC - .charmap "i", 0xDD - .charmap "j", 0xDE - .charmap "k", 0xDF - .charmap "l", 0xE0 - .charmap "m", 0xE1 - .charmap "n", 0xE2 - .charmap "o", 0xE3 - .charmap "p", 0xE4 - .charmap "q", 0xE5 - .charmap "r", 0xE6 - .charmap "s", 0xE7 - .charmap "t", 0xE8 - .charmap "u", 0xE9 - .charmap "v", 0xEA - .charmap "w", 0xEB - .charmap "x", 0xEC - .charmap "y", 0xED - .charmap "z", 0xEE - .charmap ":", 0xF0 - .charmap "$", 0xFF - -@ Hiragana - .charmap "あ", 0x01 - .charmap "い", 0x02 - .charmap "う", 0x03 - .charmap "え", 0x04 - .charmap "お", 0x05 - .charmap "か", 0x06 - .charmap "き", 0x07 - .charmap "く", 0x08 - .charmap "け", 0x09 - .charmap "こ", 0x0A - .charmap "さ", 0x0B - .charmap "し", 0x0C - .charmap "す", 0x0D - .charmap "せ", 0x0E - .charmap "そ", 0x0F - .charmap "た", 0x10 - .charmap "ち", 0x11 - .charmap "つ", 0x12 - .charmap "て", 0x13 - .charmap "と", 0x14 - .charmap "な", 0x15 - .charmap "に", 0x16 - .charmap "ぬ", 0x17 - .charmap "ね", 0x18 - .charmap "の", 0x19 - .charmap "は", 0x1A - .charmap "ひ", 0x1B - .charmap "ふ", 0x1C - .charmap "へ", 0x1D - .charmap "ほ", 0x1E - .charmap "ま", 0x1F - .charmap "み", 0x20 - .charmap "む", 0x21 - .charmap "め", 0x22 - .charmap "も", 0x23 - .charmap "や", 0x24 - .charmap "ゆ", 0x25 - .charmap "よ", 0x26 - .charmap "ら", 0x27 - .charmap "り", 0x28 - .charmap "る", 0x29 - .charmap "れ", 0x2A - .charmap "ろ", 0x2B - .charmap "わ", 0x2C - .charmap "を", 0x2D - .charmap "ん", 0x2E - .charmap "ぁ", 0x2F - .charmap "ぃ", 0x30 - .charmap "ぅ", 0x31 - .charmap "ぇ", 0x32 - .charmap "ぉ", 0x33 - .charmap "ゃ", 0x34 - .charmap "ゅ", 0x35 - .charmap "ょ", 0x36 - .charmap "が", 0x37 - .charmap "ぎ", 0x38 - .charmap "ぐ", 0x39 - .charmap "げ", 0x3A - .charmap "ご", 0x3B - .charmap "ざ", 0x3C - .charmap "じ", 0x3D - .charmap "ず", 0x3E - .charmap "ぜ", 0x3F - .charmap "ぞ", 0x40 - .charmap "だ", 0x41 - .charmap "ぢ", 0x42 - .charmap "づ", 0x43 - .charmap "で", 0x44 - .charmap "ど", 0x45 - .charmap "ば", 0x46 - .charmap "び", 0x47 - .charmap "ぶ", 0x48 - .charmap "べ", 0x49 - .charmap "ぼ", 0x4A - .charmap "ぱ", 0x4B - .charmap "ぴ", 0x4C - .charmap "ぷ", 0x4D - .charmap "ぺ", 0x4E - .charmap "ぽ", 0x4F - .charmap "っ", 0x50 - -@ Katakana - .charmap "ア", 0x51 - .charmap "イ", 0x52 - .charmap "ウ", 0x53 - .charmap "エ", 0x54 - .charmap "オ", 0x55 - .charmap "カ", 0x56 - .charmap "キ", 0x57 - .charmap "ク", 0x58 - .charmap "ケ", 0x59 - .charmap "コ", 0x5A - .charmap "サ", 0x5B - .charmap "シ", 0x5C - .charmap "ス", 0x5D - .charmap "セ", 0x5E - .charmap "ソ", 0x5F - .charmap "タ", 0x60 - .charmap "チ", 0x61 - .charmap "ツ", 0x62 - .charmap "テ", 0x63 - .charmap "ト", 0x64 - .charmap "ナ", 0x65 - .charmap "ニ", 0x66 - .charmap "ヌ", 0x67 - .charmap "ネ", 0x68 - .charmap "ノ", 0x69 - .charmap "ハ", 0x6A - .charmap "ヒ", 0x6B - .charmap "フ", 0x6C - .charmap "ヘ", 0x6D - .charmap "ホ", 0x6E - .charmap "マ", 0x6F - .charmap "ミ", 0x70 - .charmap "ム", 0x71 - .charmap "メ", 0x72 - .charmap "モ", 0x73 - .charmap "ヤ", 0x74 - .charmap "ユ", 0x75 - .charmap "ヨ", 0x76 - .charmap "ラ", 0x77 - .charmap "リ", 0x78 - .charmap "ル", 0x79 - .charmap "レ", 0x7A - .charmap "ロ", 0x7B - .charmap "ワ", 0x7C - .charmap "ヲ", 0x7D - .charmap "ン", 0x7E - .charmap "ァ", 0x7F - .charmap "ィ", 0x80 - .charmap "ゥ", 0x81 - .charmap "ェ", 0x82 - .charmap "ォ", 0x83 - .charmap "ャ", 0x84 - .charmap "ュ", 0x85 - .charmap "ョ", 0x86 - .charmap "ガ", 0x87 - .charmap "ギ", 0x88 - .charmap "グ", 0x89 - .charmap "ゲ", 0x8A - .charmap "ゴ", 0x8B - .charmap "ザ", 0x8C - .charmap "ジ", 0x8D - .charmap "ズ", 0x8E - .charmap "ゼ", 0x8F - .charmap "ゾ", 0x90 - .charmap "ダ", 0x91 - .charmap "ヂ", 0x92 - .charmap "ヅ", 0x93 - .charmap "デ", 0x94 - .charmap "ド", 0x95 - .charmap "バ", 0x96 - .charmap "ビ", 0x97 - .charmap "ブ", 0x98 - .charmap "ベ", 0x99 - .charmap "ボ", 0x9A - .charmap "パ", 0x9B - .charmap "ピ", 0x9C - .charmap "プ", 0x9D - .charmap "ペ", 0x9E - .charmap "ポ", 0x9F - .charmap "ッ", 0xA0 - - .charmap "ー", 0xAE - - .charmap_const PK, 0x53 - .charmap_const PKMN, 0x53, 0x54 - .charmap_const POKEBLOCK, 0x55, 0x56, 0x57, 0x58, 0x59 - - @ indicates the end of a town/city name (before " TOWN" or " CITY") - .charmap_const NAME_END, 0xFC, 0x00 - -@ string placeholders - @ unknown (0xFD, 0x00) - .charmap_const PLAYER, 0xFD, 0x01 - .charmap_const STR_VAR_1, 0xFD, 0x02 - .charmap_const STR_VAR_2, 0xFD, 0x03 - .charmap_const STR_VAR_3, 0xFD, 0x04 - @ unknown (0xFD, 0x05) - .charmap_const RIVAL, 0xFD, 0x06 -@ version-dependent strings - .charmap_const VERSION, 0xFD, 0x07 @ "RUBY" / "SAPPHIRE" - .charmap_const EVIL_TEAM, 0xFD, 0x08 @ "MAGMA" / "AQUA" - .charmap_const GOOD_TEAM, 0xFD, 0x09 @ "AQUA" / "MAGMA" - .charmap_const EVIL_TEAM_LEADER, 0xFD, 0x0A @ "MAXIE" / "ARCHIE" - .charmap_const GOOD_TEAM_LEADER, 0xFD, 0x0B @ "ARCHIE" / "MAXIE" - .charmap_const CUR_VERSION_MASCOT, 0xFD, 0x0C @ "GROUDON" / "KYOGRE" - .charmap_const OPP_VERSION_MASCOT, 0xFD, 0x0D @ "KYOGRE" / "GROUDON" - - .charmap_escape 'l', 0xFA @ scroll up window text - .charmap_escape 'p', 0xFB @ new paragraph - .charmap_escape 'n', 0xFE @ new line diff --git a/charmap.txt b/charmap.txt new file mode 100644 index 000000000..0bd2b7c9d --- /dev/null +++ b/charmap.txt @@ -0,0 +1,280 @@ +' ' = 00 +'é' = 1B +'&' = 2D +'%' = 5B +'(' = 5C +')' = 5D +'0' = A1 +'1' = A2 +'2' = A3 +'3' = A4 +'4' = A5 +'5' = A6 +'6' = A7 +'7' = A8 +'8' = A9 +'9' = AA +'!' = AB +'?' = AC +'.' = AD +'-' = AE +'·' = AF +'…' = B0 +'“' = B1 +'”' = B2 +'‘' = B3 +'’' = B4 +'♂' = B5 +'♀' = B6 +'¥' = B7 +',' = B8 +'×' = B9 +'/' = BA +'A' = BB +'B' = BC +'C' = BD +'D' = BE +'E' = BF +'F' = C0 +'G' = C1 +'H' = C2 +'I' = C3 +'J' = C4 +'K' = C5 +'L' = C6 +'M' = C7 +'N' = C8 +'O' = C9 +'P' = CA +'Q' = CB +'R' = CC +'S' = CD +'T' = CE +'U' = CF +'V' = D0 +'W' = D1 +'X' = D2 +'Y' = D3 +'Z' = D4 +'a' = D5 +'b' = D6 +'c' = D7 +'d' = D8 +'e' = D9 +'f' = DA +'g' = DB +'h' = DC +'i' = DD +'j' = DE +'k' = DF +'l' = E0 +'m' = E1 +'n' = E2 +'o' = E3 +'p' = E4 +'q' = E5 +'r' = E6 +'s' = E7 +'t' = E8 +'u' = E9 +'v' = EA +'w' = EB +'x' = EC +'y' = ED +'z' = EE +':' = F0 +'$' = FF + +@ Hiragana +'あ' = 01 +'い' = 02 +'う' = 03 +'え' = 04 +'お' = 05 +'か' = 06 +'き' = 07 +'く' = 08 +'け' = 09 +'こ' = 0A +'さ' = 0B +'し' = 0C +'す' = 0D +'せ' = 0E +'そ' = 0F +'た' = 10 +'ち' = 11 +'つ' = 12 +'て' = 13 +'と' = 14 +'な' = 15 +'に' = 16 +'ぬ' = 17 +'ね' = 18 +'の' = 19 +'は' = 1A +'ひ' = 1B +'ふ' = 1C +'へ' = 1D +'ほ' = 1E +'ま' = 1F +'み' = 20 +'む' = 21 +'め' = 22 +'も' = 23 +'や' = 24 +'ゆ' = 25 +'よ' = 26 +'ら' = 27 +'り' = 28 +'る' = 29 +'れ' = 2A +'ろ' = 2B +'わ' = 2C +'を' = 2D +'ん' = 2E +'ぁ' = 2F +'ぃ' = 30 +'ぅ' = 31 +'ぇ' = 32 +'ぉ' = 33 +'ゃ' = 34 +'ゅ' = 35 +'ょ' = 36 +'が' = 37 +'ぎ' = 38 +'ぐ' = 39 +'げ' = 3A +'ご' = 3B +'ざ' = 3C +'じ' = 3D +'ず' = 3E +'ぜ' = 3F +'ぞ' = 40 +'だ' = 41 +'ぢ' = 42 +'づ' = 43 +'で' = 44 +'ど' = 45 +'ば' = 46 +'び' = 47 +'ぶ' = 48 +'べ' = 49 +'ぼ' = 4A +'ぱ' = 4B +'ぴ' = 4C +'ぷ' = 4D +'ぺ' = 4E +'ぽ' = 4F +'っ' = 50 + +@ Katakana +'ア' = 51 +'イ' = 52 +'ウ' = 53 +'エ' = 54 +'オ' = 55 +'カ' = 56 +'キ' = 57 +'ク' = 58 +'ケ' = 59 +'コ' = 5A +'サ' = 5B +'シ' = 5C +'ス' = 5D +'セ' = 5E +'ソ' = 5F +'タ' = 60 +'チ' = 61 +'ツ' = 62 +'テ' = 63 +'ト' = 64 +'ナ' = 65 +'ニ' = 66 +'ヌ' = 67 +'ネ' = 68 +'ノ' = 69 +'ハ' = 6A +'ヒ' = 6B +'フ' = 6C +'ヘ' = 6D +'ホ' = 6E +'マ' = 6F +'ミ' = 70 +'ム' = 71 +'メ' = 72 +'モ' = 73 +'ヤ' = 74 +'ユ' = 75 +'ヨ' = 76 +'ラ' = 77 +'リ' = 78 +'ル' = 79 +'レ' = 7A +'ロ' = 7B +'ワ' = 7C +'ヲ' = 7D +'ン' = 7E +'ァ' = 7F +'ィ' = 80 +'ゥ' = 81 +'ェ' = 82 +'ォ' = 83 +'ャ' = 84 +'ュ' = 85 +'ョ' = 86 +'ガ' = 87 +'ギ' = 88 +'グ' = 89 +'ゲ' = 8A +'ゴ' = 8B +'ザ' = 8C +'ジ' = 8D +'ズ' = 8E +'ゼ' = 8F +'ゾ' = 90 +'ダ' = 91 +'ヂ' = 92 +'ヅ' = 93 +'デ' = 94 +'ド' = 95 +'バ' = 96 +'ビ' = 97 +'ブ' = 98 +'ベ' = 99 +'ボ' = 9A +'パ' = 9B +'ピ' = 9C +'プ' = 9D +'ペ' = 9E +'ポ' = 9F +'ッ' = A0 + +'ー' = AE + +PK = 53 +PKMN = 53 54 +POKEBLOCK = 55 56 57 58 59 + +@ indicates the end of a town/city name (before " TOWN" or " CITY") +NAME_END = FC 00 + +@ string placeholders +@ unknown (FD 00) +PLAYER = FD 01 +STR_VAR_1 = FD 02 +STR_VAR_2 = FD 03 +STR_VAR_3 = FD 04 +@ unknown (FD 05) +RIVAL = FD 06 +@ version-dependent strings +VERSION = FD 07 @ "RUBY" / "SAPPHIRE" +EVIL_TEAM = FD 08 @ "MAGMA" / "AQUA" +GOOD_TEAM = FD 09 @ "AQUA" / "MAGMA" +EVIL_TEAM_LEADER = FD 0A @ "MAXIE" / "ARCHIE" +GOOD_TEAM_LEADER = FD 0B @ "ARCHIE" / "MAXIE" +CUR_VERSION_MASCOT = FD 0C @ "GROUDON" / "KYOGRE" +OPP_VERSION_MASCOT = FD 0D @ "KYOGRE" / "GROUDON" + +'\l' = FA @ scroll up window text +'\p' = FB @ new paragraph +'\n' = FE @ new line diff --git a/data/data1.s b/data/data1.s index ebfa367a5..6428dbac6 100644 --- a/data/data1.s +++ b/data/data1.s @@ -2,7 +2,6 @@ .include "asm/macros.s" .include "constants/constants.s" - .include "asm/charmap.s" .section script_data, "aw", %progbits diff --git a/data/data2.s b/data/data2.s index cdddccb8f..d0a3611c5 100644 --- a/data/data2.s +++ b/data/data2.s @@ -2,7 +2,6 @@ .include "asm/macros.s" .include "constants/constants.s" - .include "asm/charmap.s" .section .rodata diff --git a/fix_local_labels.pl b/fix_local_labels.pl deleted file mode 100644 index d3258c766..000000000 --- a/fix_local_labels.pl +++ /dev/null @@ -1,28 +0,0 @@ -use strict; -use warnings; - -open(IN_FILE, $ARGV[0]); -open(OUT_FILE, ">", $ARGV[1]); - -my @labels = (); - -while (<IN_FILE>) { - if ($_ =~ /^\.(.+):/) { - push(@labels, $1); - } -} - -seek IN_FILE, 0, 0; - -while (<IN_FILE>) { - for (my $i = 0; $i < scalar(@labels); $i++) { - my $find = quotemeta '.' . $labels[$i]; - my $replace = '$' . $labels[$i]; - $_ =~ s/$find/$replace/; - } - - print OUT_FILE $_; -} - -close(IN_FILE); -close(OUT_FILE); diff --git a/tools/preproc/.gitignore b/tools/preproc/.gitignore new file mode 100644 index 000000000..eb3470879 --- /dev/null +++ b/tools/preproc/.gitignore @@ -0,0 +1 @@ +preproc diff --git a/tools/preproc/LICENSE b/tools/preproc/LICENSE new file mode 100644 index 000000000..534d15349 --- /dev/null +++ b/tools/preproc/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2016 YamaArashi + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/tools/preproc/Makefile b/tools/preproc/Makefile new file mode 100644 index 000000000..10f930ebd --- /dev/null +++ b/tools/preproc/Makefile @@ -0,0 +1,15 @@ +CXX := g++ + +CXXFLAGS := -std=c++14 -O2 + +SRCS := asm_file.cpp charmap.cpp preproc.cpp utf8.cpp + +HEADERS := asm_file.h char_util.h charmap.h preproc.h utf8.h + +.PHONY: clean + +preproc: $(SRCS) $(HEADERS) + $(CXX) $(CXXFLAGS) $(SRCS) -o $@ + +clean: + $(RM) preproc preproc.exe diff --git a/tools/preproc/asm_file.cpp b/tools/preproc/asm_file.cpp new file mode 100644 index 000000000..0b42d9ab2 --- /dev/null +++ b/tools/preproc/asm_file.cpp @@ -0,0 +1,563 @@ +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include <cstdio> +#include <cstdarg> +#include "preproc.h" +#include "asm_file.h" +#include "char_util.h" +#include "utf8.h" + +AsmFile::AsmFile(std::string filename) : m_filename(filename) +{ + FILE *fp = std::fopen(filename.c_str(), "rb"); + + if (fp == NULL) + FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str()); + + std::fseek(fp, 0, SEEK_END); + + m_size = std::ftell(fp); + + m_buffer = new char[m_size + 1]; + + std::rewind(fp); + + if (std::fread(m_buffer, m_size, 1, fp) != 1) + FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str()); + + m_buffer[m_size] = 0; + + std::fclose(fp); + + m_pos = 0; + m_lineNum = 1; + m_lineStart = 0; + + RemoveComments(); +} + +AsmFile::AsmFile(AsmFile&& other) : m_filename(std::move(other.m_filename)) +{ + m_buffer = other.m_buffer; + m_pos = other.m_pos; + m_size = other.m_size; + m_lineNum = other.m_lineNum; + m_lineStart = other.m_lineStart; + + other.m_buffer = nullptr; +} + +AsmFile::~AsmFile() +{ + delete[] m_buffer; +} + +// Removes comments to simplify further processing. +// It stops upon encountering a null character, +// which may or may not be the end of file marker. +// If it's not, the error will be caught later. +void AsmFile::RemoveComments() +{ + long pos = 0; + bool inString = false; + char stringChar; + + for (;;) + { + if (m_buffer[pos] == 0) + return; + + if (inString) + { + if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == stringChar) + { + pos += 2; + } + else + { + if (m_buffer[pos] == stringChar) + inString = false; + pos++; + } + } + else if (m_buffer[pos] == '@' && (pos == 0 || m_buffer[pos - 1] != '\\')) + { + while (m_buffer[pos] != '\n' && m_buffer[pos] != 0) + m_buffer[pos++] = ' '; + } + else if (m_buffer[pos] == '/' && m_buffer[pos + 1] == '*') + { + m_buffer[pos++] = ' '; + m_buffer[pos++] = ' '; + + bool inCommentString = false; + char commentStringChar; + + for (;;) + { + if (m_buffer[pos] == 0) + return; + + if (inCommentString) + { + if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == commentStringChar) + { + m_buffer[pos++] = ' '; + m_buffer[pos++] = ' '; + } + else + { + if (m_buffer[pos] == commentStringChar) + inCommentString = false; + if (m_buffer[pos] != '\n') + m_buffer[pos] = ' '; + pos++; + } + } + else + { + if (m_buffer[pos] == '*' && m_buffer[pos + 1] == '/') + { + m_buffer[pos++] = ' '; + m_buffer[pos++] = ' '; + break; + } + else + { + if (m_buffer[pos] == '"' || m_buffer[pos] == '\'') + { + commentStringChar = m_buffer[pos]; + inCommentString = true; + } + if (m_buffer[pos] != '\n') + m_buffer[pos] = ' '; + pos++; + } + } + } + } + else + { + if (m_buffer[pos] == '"' || m_buffer[pos] == '\'') + { + stringChar = m_buffer[pos]; + inString = true; + } + pos++; + } + } +} + +// Checks if we're at a particular directive and if so, consumes it. +// Returns whether the directive was found. +bool AsmFile::CheckForDirective(std::string name) +{ + long i; + long length = static_cast<long>(name.length()); + + for (i = 0; i < length && m_pos + i < m_size; i++) + if (name[i] != m_buffer[m_pos + i]) + return false; + + if (i < length) + return false; + + m_pos += length; + + return true; +} + +// Checks if we're at a known directive and if so, consumes it. +// Returns which directive was found. +Directive AsmFile::GetDirective() +{ + SkipWhitespace(); + + if (CheckForDirective(".include")) + return Directive::Include; + else if (CheckForDirective(".string")) + return Directive::String; + else + return Directive::Unknown; +} + +// Skips tabs and spaces. +void AsmFile::SkipWhitespace() +{ + while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ') + m_pos++; +} + +// Reads include path. +std::string AsmFile::ReadPath() +{ + SkipWhitespace(); + + if (m_buffer[m_pos] != '"') + RaiseError("expected file path"); + + m_pos++; + + int length = 0; + long startPos = m_pos; + + while (m_buffer[m_pos] != '"') + { + unsigned char c = m_buffer[m_pos++]; + + if (c == 0) + { + if (m_pos >= m_size) + RaiseError("unexpected EOF in include string"); + else + RaiseError("unexpected null character in include string"); + } + + if (!IsAsciiPrintable(c)) + RaiseError("unexpected character '\\x%02X' in include string", c); + + // Don't bother allowing any escape sequences. + if (c == '\\') + { + c = m_buffer[m_pos]; + RaiseError("unexpected escape '\\%c' in include string", c); + } + + length++; + + if (length > kMaxPath) + RaiseError("path is too long"); + } + + m_pos++; // Go past the right quote. + + ExpectEmptyRestOfLine(); + + return std::string(&m_buffer[startPos], length); +} + +// Reads a charmap char or escape sequence. +std::string AsmFile::ReadCharOrEscape() +{ + std::string sequence; + + bool isEscape = (m_buffer[m_pos] == '\\'); + + if (isEscape) + { + m_pos++; + + if (m_buffer[m_pos] == '"') + { + sequence = g_charmap->Char('"'); + + if (sequence.length() == 0) + RaiseError("no mapping exists for double quote"); + + return sequence; + } + else if (m_buffer[m_pos] == '\\') + { + sequence = g_charmap->Char('\\'); + + if (sequence.length() == 0) + RaiseError("no mapping exists for backslash"); + + return sequence; + } + } + + unsigned char c = m_buffer[m_pos]; + + if (c == 0) + { + if (m_pos >= m_size) + RaiseError("unexpected EOF in UTF-8 string"); + else + RaiseError("unexpected null character in UTF-8 string"); + } + + if (IsAscii(c) && !IsAsciiPrintable(c)) + RaiseError("unexpected character U+%X in UTF-8 string", c); + + UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]); + m_pos += unicodeChar.encodingLength; + std::int32_t code = unicodeChar.code; + + if (code == -1) + RaiseError("invalid encoding in UTF-8 string"); + + if (isEscape && code >= 128) + RaiseError("escapes using non-ASCII characters are invalid"); + + sequence = isEscape ? g_charmap->Escape(code) : g_charmap->Char(code); + + if (sequence.length() == 0) + { + if (isEscape) + RaiseError("unknown escape '\\%c'", code); + else + RaiseError("unknown character U+%X", code); + } + + return sequence; +} + +// Reads a charmap constant, i.e. "{FOO}". +std::string AsmFile::ReadConstant() +{ + m_pos++; // Assume we're on the left curly bracket. + + long startPos = m_pos; + + while (IsIdentifierChar(m_buffer[m_pos])) + m_pos++; + + if (m_buffer[m_pos] != '}') + { + if (m_buffer[m_pos] == 0) + { + if (m_pos >= m_size) + RaiseError("unexpected EOF in identifier"); + else + RaiseError("unexpected null character in identifier"); + } + + RaiseError("unexpected character in identifier"); + } + + std::string sequence = g_charmap->Constant(std::string(&m_buffer[startPos], m_pos - startPos)); + + if (sequence.length() == 0) + { + m_buffer[m_pos] = 0; + RaiseError("unknown constant '%s'", &m_buffer[startPos]); + } + + m_pos++; // Go past the right curly bracket. + + return sequence; +} + +// Reads a charmap string. +int AsmFile::ReadString(unsigned char* s) +{ + SkipWhitespace(); + + if (m_buffer[m_pos] != '"') + RaiseError("expected UTF-8 string literal"); + + m_pos++; + + int length = 0; + + while (m_buffer[m_pos] != '"') + { + std::string sequence = (m_buffer[m_pos] == '{') ? ReadConstant() : ReadCharOrEscape(); + + for (const char& c : sequence) + { + if (length == kMaxStringLength) + RaiseError("mapped string longer than %d bytes", length); + + s[length++] = c; + } + } + + m_pos++; // Go past the right quote. + + SkipWhitespace(); + + if (ConsumeComma()) + { + SkipWhitespace(); + int padLength = ReadPadLength(); + + while (length < padLength) + { + s[length++] = 0; + } + } + + ExpectEmptyRestOfLine(); + + return length; +} + +// If we're at a comma, consumes it. +// Returns whether a comma was found. +bool AsmFile::ConsumeComma() +{ + if (m_buffer[m_pos] == ',') + { + m_pos++; + return true; + } + + return false; +} + +// Converts digit character to numerical value. +static int ConvertDigit(char c, int radix) +{ + int digit; + + if (c >= '0' && c <= '9') + digit = c - '0'; + else if (c >= 'A' && c <= 'F') + digit = 10 + c - 'A'; + else if (c >= 'a' && c <= 'f') + digit = 10 + c - 'a'; + else + return -1; + + return (digit < radix) ? digit : -1; +} + +// Reads the pad length for a charmap string. +int AsmFile::ReadPadLength() +{ + if (!IsAsciiDigit(m_buffer[m_pos])) + RaiseError("expected integer"); + + int radix = 10; + + if (m_buffer[m_pos] == '0' && m_buffer[m_pos + 1] == 'x') + { + radix = 16; + m_pos += 2; + } + + int n = 0; + int digit; + + while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1) + { + n = n * radix + digit; + + if (n > kMaxStringLength) + RaiseError("pad length greater than maximum length (%d)", kMaxStringLength); + + m_pos++; + } + + return n; +} + +// Outputs the current line and moves to the next one. +void AsmFile::OutputLine() +{ + while (m_buffer[m_pos] != '\n' && m_buffer[m_pos] != 0) + m_pos++; + + if (m_buffer[m_pos] == 0) + { + if (m_pos >= m_size) + { + RaiseWarning("file doesn't end with newline"); + puts(&m_buffer[m_lineStart]); + } + else + { + RaiseError("unexpected null character"); + } + } + else + { + m_buffer[m_pos] = 0; + puts(&m_buffer[m_lineStart]); + m_buffer[m_pos] = '\n'; + m_pos++; + m_lineStart = m_pos; + m_lineNum++; + } +} + +// Asserts that the rest of the line is empty and moves to the next one. +void AsmFile::ExpectEmptyRestOfLine() +{ + SkipWhitespace(); + + if (m_buffer[m_pos] == 0) + { + if (m_pos >= m_size) + RaiseWarning("file doesn't end with newline"); + else + RaiseError("unexpected null character"); + } + else if (m_buffer[m_pos] == '\n') + { + m_pos++; + m_lineStart = m_pos; + m_lineNum++; + } + else if (m_buffer[m_pos] == '\r') + { + RaiseError("only Unix-style LF newlines are supported"); + } + else + { + RaiseError("junk at end of line"); + } +} + +// Checks if we're at the end of the file. +bool AsmFile::IsAtEnd() +{ + return (m_pos >= m_size); +} + +// Output the current location to set gas's logical file and line numbers. +void AsmFile::OutputLocation() +{ + printf("# %ld \"%s\"\n", m_lineNum, m_filename.c_str()); +} + +// Reports a diagnostic message. +void AsmFile::ReportDiagnostic(const char* type, const char* format, std::va_list args) +{ + const int bufferSize = 1024; + char buffer[bufferSize]; + std::vsnprintf(buffer, bufferSize, format, args); + std::fprintf(stderr, "%s:%ld: %s: %s\n", m_filename.c_str(), m_lineNum, type, buffer); +} + +#define DO_REPORT(type) \ +do \ +{ \ + std::va_list args; \ + va_start(args, format); \ + ReportDiagnostic(type, format, args); \ + va_end(args); \ +} while (0) + +// Reports an error diagnostic and terminates the program. +void AsmFile::RaiseError(const char* format, ...) +{ + DO_REPORT("error"); + exit(1); +} + +// Reports a warning diagnostic. +void AsmFile::RaiseWarning(const char* format, ...) +{ + DO_REPORT("warning"); +} diff --git a/tools/preproc/asm_file.h b/tools/preproc/asm_file.h new file mode 100644 index 000000000..1c137d7d3 --- /dev/null +++ b/tools/preproc/asm_file.h @@ -0,0 +1,71 @@ +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef ASM_FILE_H +#define ASM_FILE_H + +#include <cstdarg> +#include <cstdint> +#include <string> +#include "preproc.h" + +enum class Directive +{ + Include, + String, + Unknown +}; + +class AsmFile +{ +public: + AsmFile(std::string filename); + AsmFile(AsmFile&& other); + AsmFile(const AsmFile&) = delete; + ~AsmFile(); + Directive GetDirective(); + std::string ReadPath(); + int ReadString(unsigned char* s); + bool IsAtEnd(); + void OutputLine(); + void OutputLocation(); + +private: + char* m_buffer; + long m_pos; + long m_size; + long m_lineNum; + long m_lineStart; + std::string m_filename; + + bool ConsumeComma(); + int ReadPadLength(); + void RemoveComments(); + bool CheckForDirective(std::string name); + std::string ReadCharOrEscape(); + std::string ReadConstant(); + void SkipWhitespace(); + void ExpectEmptyRestOfLine(); + void ReportDiagnostic(const char* type, const char* format, std::va_list args); + void RaiseError(const char* format, ...); + void RaiseWarning(const char *format, ...); +}; + +#endif // ASM_FILE_H diff --git a/tools/preproc/char_util.h b/tools/preproc/char_util.h new file mode 100644 index 000000000..05f9a1dd4 --- /dev/null +++ b/tools/preproc/char_util.h @@ -0,0 +1,65 @@ +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef CHAR_UTIL_H +#define CHAR_UTIL_H + +#include <cstdint> +#include <cassert> + +inline bool IsAscii(unsigned char c) +{ + return (c < 128); +} + +inline bool IsAsciiAlpha(unsigned char c) +{ + return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')); +} + +inline bool IsAsciiDigit(unsigned char c) +{ + return (c >= '0' && c <= '9'); +} + +inline bool IsAsciiHexDigit(unsigned char c) +{ + return ((c >= '0' && c <= '9') + || (c >= 'a' && c <= 'f') + || (c >= 'A' && c <= 'F')); +} + +inline bool IsAsciiAlphanum(unsigned char c) +{ + return (IsAsciiAlpha(c) || IsAsciiDigit(c)); +} + +inline bool IsAsciiPrintable(unsigned char c) +{ + return (c >= ' ' && c <= '~'); +} + +// Returns whether the character can be used in the identifier of a "{FOO}" constant in strings. +inline bool IsIdentifierChar(unsigned char c) +{ + return IsAsciiAlphanum(c) || c == '_'; +} + +#endif // CHAR_UTIL_H diff --git a/tools/preproc/charmap.cpp b/tools/preproc/charmap.cpp new file mode 100644 index 000000000..de693eda5 --- /dev/null +++ b/tools/preproc/charmap.cpp @@ -0,0 +1,397 @@ +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include <cstdio> +#include <cstdint> +#include <cstdarg> +#include "preproc.h" +#include "charmap.h" +#include "char_util.h" +#include "utf8.h" + +enum LhsType +{ + Char, + Escape, + Constant, + None +}; + +struct Lhs +{ + LhsType type; + std::string name; + std::int32_t code; +}; + +class CharmapReader +{ +public: + CharmapReader(std::string filename); + CharmapReader(const CharmapReader&) = delete; + ~CharmapReader(); + Lhs ReadLhs(); + void ExpectEqualsSign(); + std::string ReadSequence(); + void ExpectEmptyRestOfLine(); + +private: + char* m_buffer; + long m_pos; + long m_size; + long m_lineNum; + std::string m_filename; + + void RaiseError(const char* format, ...); + void RemoveComments(); + std::string ReadConstant(); + void SkipWhitespace(); +}; + +CharmapReader::CharmapReader(std::string filename) : m_filename(filename) +{ + FILE *fp = std::fopen(filename.c_str(), "rb"); + + if (fp == NULL) + FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str()); + + std::fseek(fp, 0, SEEK_END); + + m_size = std::ftell(fp); + + m_buffer = new char[m_size + 1]; + + std::rewind(fp); + + if (std::fread(m_buffer, m_size, 1, fp) != 1) + FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str()); + + m_buffer[m_size] = 0; + + std::fclose(fp); + + m_pos = 0; + m_lineNum = 1; + + RemoveComments(); +} + +CharmapReader::~CharmapReader() +{ + delete[] m_buffer; +} + +Lhs CharmapReader::ReadLhs() +{ + Lhs lhs; + + for (;;) + { + SkipWhitespace(); + + if (m_buffer[m_pos] == '\n') + { + m_pos++; + m_lineNum++; + } + else + { + break; + } + } + + if (m_buffer[m_pos] == '\'') + { + m_pos++; + + bool isEscape = (m_buffer[m_pos] == '\\'); + + if (isEscape) + { + m_pos++; + } + + unsigned char c = m_buffer[m_pos]; + + if (c == 0) + { + if (m_pos >= m_size) + RaiseError("unexpected EOF in UTF-8 character literal"); + else + RaiseError("unexpected null character in UTF-8 character literal"); + } + + if (IsAscii(c) && !IsAsciiPrintable(c)) + RaiseError("unexpected character U+%X in UTF-8 character literal", c); + + UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]); + std::int32_t code = unicodeChar.code; + + if (code == -1) + RaiseError("invalid encoding in UTF-8 character literal"); + + m_pos += unicodeChar.encodingLength; + + if (m_buffer[m_pos] != '\'') + RaiseError("unterminated character literal"); + + m_pos++; + + lhs.code = code; + + if (isEscape) + { + if (code >= 128) + RaiseError("escapes using non-ASCII characters are invalid"); + + switch (code) + { + case '\'': + lhs.type = LhsType::Char; + break; + case '\\': + lhs.type = LhsType::Char; + case '"': + RaiseError("cannot escape double quote"); + break; + default: + lhs.type = LhsType::Escape; + } + } + else + { + if (code == '\'') + RaiseError("empty character literal"); + + lhs.type = LhsType::Char; + } + } + else if (IsIdentifierChar(m_buffer[m_pos])) + { + lhs.type = LhsType::Constant; + lhs.name = ReadConstant(); + } + else if (m_buffer[m_pos] == '\r') + { + RaiseError("only Unix-style LF newlines are supported"); + } + else if (m_buffer[m_pos] == 0) + { + if (m_pos < m_size) + RaiseError("unexpected null character"); + lhs.type = LhsType::None; + } + else + { + RaiseError("junk at start of line"); + } + + return lhs; +} + +void CharmapReader::ExpectEqualsSign() +{ + SkipWhitespace(); + + if (m_buffer[m_pos] != '=') + RaiseError("expected equals sign"); + + m_pos++; +} + +static unsigned int ConvertHexDigit(char c) +{ + unsigned int digit = 0; + + if (c >= '0' && c <= '9') + digit = c - '0'; + else if (c >= 'A' && c <= 'F') + digit = 10 + c - 'A'; + else if (c >= 'a' && c <= 'f') + digit = 10 + c - 'a'; + + return digit; +} + +std::string CharmapReader::ReadSequence() +{ + SkipWhitespace(); + + long startPos = m_pos; + + unsigned int length = 0; + + while (IsAsciiHexDigit(m_buffer[m_pos]) && IsAsciiHexDigit(m_buffer[m_pos + 1])) + { + m_pos += 2; + length++; + + if (length > kMaxCharmapSequenceLength) + RaiseError("byte sequence too long (max is %lu bytes)", kMaxCharmapSequenceLength); + + SkipWhitespace(); + } + + if (IsAsciiHexDigit(m_buffer[m_pos])) + RaiseError("each byte must have 2 hex digits"); + + if (length == 0) + RaiseError("expected byte sequence"); + + std::string sequence; + sequence.reserve(length); + + m_pos = startPos; + + for (unsigned int i = 0; i < length; i++) + { + unsigned int digit1 = ConvertHexDigit(m_buffer[m_pos]); + unsigned int digit2 = ConvertHexDigit(m_buffer[m_pos + 1]); + unsigned char byte = digit1 * 16 + digit2; + sequence += byte; + + m_pos += 2; + SkipWhitespace(); + } + + return sequence; +} + +void CharmapReader::ExpectEmptyRestOfLine() +{ + SkipWhitespace(); + + if (m_buffer[m_pos] == 0) + { + if (m_pos < m_size) + RaiseError("unexpected null character"); + } + else if (m_buffer[m_pos] == '\n') + { + m_pos++; + m_lineNum++; + } + else if (m_buffer[m_pos] == '\r') + { + RaiseError("only Unix-style LF newlines are supported"); + } + else + { + RaiseError("junk at end of line"); + } +} + +void CharmapReader::RaiseError(const char* format, ...) +{ + const int bufferSize = 1024; + char buffer[bufferSize]; + + std::va_list args; + va_start(args, format); + std::vsnprintf(buffer, bufferSize, format, args); + va_end(args); + + std::fprintf(stderr, "%s:%ld: error: %s\n", m_filename.c_str(), m_lineNum, buffer); +} + +void CharmapReader::RemoveComments() +{ + long pos = 0; + bool inString = false; + + for (;;) + { + if (m_buffer[pos] == 0) + return; + + if (inString) + { + if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == '\'') + { + pos += 2; + } + else + { + if (m_buffer[pos] == '\'') + inString = false; + pos++; + } + } + else if (m_buffer[pos] == '@') + { + while (m_buffer[pos] != '\n' && m_buffer[pos] != 0) + m_buffer[pos++] = ' '; + } + else + { + if (m_buffer[pos] == '\'') + inString = true; + pos++; + } + } +} + +std::string CharmapReader::ReadConstant() +{ + long startPos = m_pos; + + while (IsIdentifierChar(m_buffer[m_pos])) + m_pos++; + + return std::string(&m_buffer[startPos], m_pos - startPos); +} + +void CharmapReader::SkipWhitespace() +{ + while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ') + m_pos++; +} + +Charmap::Charmap(std::string filename) +{ + CharmapReader reader(filename); + + for (;;) + { + Lhs lhs = reader.ReadLhs(); + + if (lhs.type == LhsType::None) + return; + + reader.ExpectEqualsSign(); + + std::string sequence = reader.ReadSequence(); + + switch (lhs.type) + { + case LhsType::Char: + m_chars[lhs.code] = sequence; + break; + case LhsType::Escape: + m_escapes[lhs.code] = sequence; + break; + case LhsType::Constant: + m_constants[lhs.name] = sequence; + break; + } + + reader.ExpectEmptyRestOfLine(); + } +} diff --git a/tools/preproc/charmap.h b/tools/preproc/charmap.h new file mode 100644 index 000000000..0d752ac99 --- /dev/null +++ b/tools/preproc/charmap.h @@ -0,0 +1,64 @@ +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef CHARMAP_H +#define CHARMAP_H + +#include <cstdint> +#include <string> +#include <map> +#include <vector> + +class Charmap +{ +public: + Charmap(std::string filename); + + std::string Char(std::int32_t code) + { + auto it = m_chars.find(code); + + if (it == m_chars.end()) + return std::string(); + + return it->second; + } + + std::string Escape(unsigned char code) + { + return m_escapes[code]; + } + + std::string Constant(std::string identifier) + { + auto it = m_constants.find(identifier); + + if (it == m_constants.end()) + return std::string(); + + return it->second; + } +private: + std::map<std::int32_t, std::string> m_chars; + std::string m_escapes[128]; + std::map<std::string, std::string> m_constants; +}; + +#endif // CHARMAP_H diff --git a/tools/preproc/preproc.cpp b/tools/preproc/preproc.cpp new file mode 100644 index 000000000..7fc3f5c10 --- /dev/null +++ b/tools/preproc/preproc.cpp @@ -0,0 +1,83 @@ +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include <stack> +#include "preproc.h" +#include "asm_file.h" +#include "charmap.h" + +Charmap* g_charmap; + +int main(int argc, char **argv) +{ + if (argc != 3) + { + fprintf(stderr, "Usage: %s ASM_FILE CHARMAP_FILE", argv[0]); + return 1; + } + + g_charmap = new Charmap(argv[2]); + + std::stack<AsmFile> stack; + + stack.push(AsmFile(argv[1])); + + for (;;) + { + while (stack.top().IsAtEnd()) + { + stack.pop(); + + if (stack.empty()) + return 0; + else + stack.top().OutputLocation(); + } + + Directive directive = stack.top().GetDirective(); + + switch (directive) + { + case Directive::Include: + stack.push(AsmFile(stack.top().ReadPath())); + stack.top().OutputLocation(); + break; + case Directive::String: + { + unsigned char s[kMaxStringLength]; + int length = stack.top().ReadString(s); + + printf("\t.byte "); + for (int i = 0; i < length; i++) + { + printf("0x%02X", s[i]); + + if (i < length - 1) + printf(", "); + } + putchar('\n'); + break; + } + case Directive::Unknown: + stack.top().OutputLine(); + break; + } + } +} diff --git a/tools/preproc/preproc.h b/tools/preproc/preproc.h new file mode 100644 index 000000000..c9e1a8414 --- /dev/null +++ b/tools/preproc/preproc.h @@ -0,0 +1,52 @@ +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef PREPROC_H +#define PREPROC_H + +#include "charmap.h" + +#ifdef _MSC_VER + +#define FATAL_ERROR(format, ...) \ +do \ +{ \ + fprintf(stderr, format, __VA_ARGS__); \ + exit(1); \ +} while (0) + +#else + +#define FATAL_ERROR(format, ...) \ +do \ +{ \ + fprintf(stderr, format, ##__VA_ARGS__); \ + exit(1); \ +} while (0) + +#endif // _MSC_VER + +const int kMaxPath = 256; +const int kMaxStringLength = 256; +const unsigned long kMaxCharmapSequenceLength = 16; + +extern Charmap* g_charmap; + +#endif // PREPROC_H diff --git a/tools/preproc/utf8.cpp b/tools/preproc/utf8.cpp new file mode 100644 index 000000000..0aed83f4a --- /dev/null +++ b/tools/preproc/utf8.cpp @@ -0,0 +1,94 @@ +// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> +// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. +// +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include <cstdint> +#include "utf8.h" + +static const unsigned char s_byteTypeTable[] = +{ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df + 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef + 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff +}; + +const unsigned char s0 = 0 * 12; +const unsigned char s1 = 1 * 12; +const unsigned char s2 = 2 * 12; +const unsigned char s3 = 3 * 12; +const unsigned char s4 = 4 * 12; +const unsigned char s5 = 5 * 12; +const unsigned char s6 = 6 * 12; +const unsigned char s7 = 7 * 12; +const unsigned char s8 = 8 * 12; + +static const unsigned char s_transitionTable[] = +{ + s0,s1,s2,s3,s5,s8,s7,s1,s1,s1,s4,s6, // s0 + s1,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1, // s1 + s1,s0,s1,s1,s1,s1,s1,s0,s1,s0,s1,s1, // s2 + s1,s2,s1,s1,s1,s1,s1,s2,s1,s2,s1,s1, // s3 + s1,s1,s1,s1,s1,s1,s1,s2,s1,s1,s1,s1, // s4 + s1,s2,s1,s1,s1,s1,s1,s1,s1,s2,s1,s1, // s5 + s1,s1,s1,s1,s1,s1,s1,s3,s1,s3,s1,s1, // s6 + s1,s3,s1,s1,s1,s1,s1,s3,s1,s3,s1,s1, // s7 + s1,s3,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1, // s8 +}; + +// Decodes UTF-8 encoded Unicode code point at "s". +// If the encoding is valid, it returns the code point and advances "s" past the byte sequence. +// If the encoding is not valid, it returns -1 and doesn't advance "s". +UnicodeChar DecodeUtf8(const char* s) +{ + UnicodeChar unicodeChar; + int state = s0; + auto start = s; + + do + { + unsigned char byte = *s++; + int type = s_byteTypeTable[byte]; + + if (state == s0) + unicodeChar.code = (0xFF >> type) & byte; + else + unicodeChar.code = (unicodeChar.code << 6) | (byte & 0x3F); + + state = s_transitionTable[state + type]; + + if (state == s1) + { + unicodeChar.code = -1; + return unicodeChar; + } + } while (state != s0); + + unicodeChar.encodingLength = s - start; + + return unicodeChar; +} diff --git a/tools/preproc/utf8.h b/tools/preproc/utf8.h new file mode 100644 index 000000000..259de67c5 --- /dev/null +++ b/tools/preproc/utf8.h @@ -0,0 +1,34 @@ +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef UTF8_H +#define UTF8_H + +#include <cstdint> + +struct UnicodeChar +{ + std::int32_t code; + int encodingLength; +}; + +UnicodeChar DecodeUtf8(const char* s); + +#endif // UTF8_H |