summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYamaArashi <shadow962@live.com>2016-04-22 18:25:40 -0700
committerYamaArashi <shadow962@live.com>2016-04-22 18:25:40 -0700
commit0df6676bfccb1d68aa741997702d2a558110d322 (patch)
tree440a7906ea6e75e17079ff1aed44900942b1e85a
parent0601fb4d0013bd0f2508364b313acbdc371da674 (diff)
use preprocessor
-rw-r--r--Makefile45
-rw-r--r--asm/charmap.s280
-rw-r--r--charmap.txt280
-rw-r--r--data/data1.s1
-rw-r--r--data/data2.s1
-rw-r--r--fix_local_labels.pl28
-rw-r--r--tools/preproc/.gitignore1
-rw-r--r--tools/preproc/LICENSE19
-rw-r--r--tools/preproc/Makefile15
-rw-r--r--tools/preproc/asm_file.cpp563
-rw-r--r--tools/preproc/asm_file.h71
-rw-r--r--tools/preproc/char_util.h65
-rw-r--r--tools/preproc/charmap.cpp397
-rw-r--r--tools/preproc/charmap.h64
-rw-r--r--tools/preproc/preproc.cpp83
-rw-r--r--tools/preproc/preproc.h52
-rw-r--r--tools/preproc/utf8.cpp94
-rw-r--r--tools/preproc/utf8.h34
18 files changed, 1766 insertions, 327 deletions
diff --git a/Makefile b/Makefile
index ce077789c..8c36fb831 100644
--- a/Makefile
+++ b/Makefile
@@ -10,6 +10,8 @@ GFX := @tools/gbagfx/gbagfx
SCANINC := tools/scaninc/scaninc
+PREPROC := tools/preproc/preproc
+
# Clear the default suffixes.
.SUFFIXES:
@@ -20,19 +22,22 @@ SCANINC := tools/scaninc/scaninc
.PHONY: rom tools gbagfx scaninc clean compare deps
-CSRCS := $(wildcard src/*.c)
-OBJS := asm/crt0.o asm/rom1.o asm/rom2.o asm/rom3.o asm/rom4.o asm/rom5.o \
- asm/libgcnmultiboot.o asm/libmks4agb.o asm/libagbsyscall.o asm/libgcc.o \
- src/string_util.o src/rtc.o src/play_time.o src/task.o \
- src/agb_flash.o src/agb_flash_1m.o src/agb_flash_mx.o src/siirtc.o \
- data/data1.o data/data2.o
+C_SRCS := $(wildcard src/*.c)
+C_OBJS := $(C_SRCS:%.c=%.o)
+
+ASM_OBJS := asm/crt0.o asm/rom1.o asm/rom2.o asm/rom3.o asm/rom4.o asm/rom5.o \
+ asm/libgcnmultiboot.o asm/libmks4agb.o asm/libagbsyscall.o asm/libgcc.o
+
+DATA_ASM_OBJS := data/data1.o data/data2.o
+
+OBJS := $(C_OBJS) $(ASM_OBJS) $(DATA_ASM_OBJS)
ROM := pokeruby.gba
ELF := $(ROM:.gba=.elf)
rom: $(ROM)
-tools: gbagfx scaninc
+tools: gbagfx scaninc preproc
gbagfx:
cd tools/gbagfx && make
@@ -40,6 +45,9 @@ gbagfx:
scaninc:
cd tools/scaninc && make
+preproc:
+ cd tools/preproc && make
+
# For contributors to make sure a change didn't affect the contents of the ROM.
compare: $(ROM)
@$(SHA1) rom.sha1
@@ -59,22 +67,25 @@ include tilesets.mk
%.gbapal: %.pal ; $(GFX) $< $@
%.lz: % ; $(GFX) $< $@
-$(OBJS): $(CSRCS:src/%.c=src/%.s)
+src/siirtc.o: CFLAGS := -mthumb-interwork -Iinclude
-src/siirtc.s: CFLAGS := -mthumb-interwork -Iinclude
+src/agb_flash.o: CFLAGS := -O -mthumb-interwork -Iinclude
+src/agb_flash_1m.o: CFLAGS := -O -mthumb-interwork -Iinclude
+src/agb_flash_mx.o: CFLAGS := -O -mthumb-interwork -Iinclude
-src/agb_flash.s: CFLAGS := -O -mthumb-interwork -Iinclude
-src/agb_flash_1m.s: CFLAGS := -O -mthumb-interwork -Iinclude
-src/agb_flash_mx.s: CFLAGS := -O -mthumb-interwork -Iinclude
+$(C_OBJS): %.o : %.c
+ $(CC) $(CFLAGS) -o $*.s $< -S
+ echo -e ".text\n\t.align\t2, 0\n" >> $*.s
+ $(AS) $(ASFLAGS) -o $@ $*.s
-src/%.s: src/%.c
- $(CC) $(CFLAGS) -o $@ $< -S
- echo -e ".text\n\t.align\t2, 0\n" >> $@
+%.o : dep = $(shell $(SCANINC) $*.s)
-%.o: dep = $(shell $(SCANINC) $(@D)/$*.s)
-%.o: %.s $$(dep)
+$(ASM_OBJS): %.o: %.s $$(dep)
$(AS) $(ASFLAGS) -o $@ $<
+$(DATA_ASM_OBJS): %.o: %.s $$(dep)
+ $(PREPROC) $< charmap.txt | $(AS) $(ASFLAGS) -o $@
+
# Link objects to produce the ROM.
$(ROM): $(OBJS)
./pokeld -T ld_script.txt -T iwram_syms.txt -T ewram_syms.txt -o $(ELF) $(OBJS)
diff --git a/asm/charmap.s b/asm/charmap.s
deleted file mode 100644
index 4b0e19917..000000000
--- a/asm/charmap.s
+++ /dev/null
@@ -1,280 +0,0 @@
- .charmap " ", 0x00
- .charmap "é", 0x1B
- .charmap "&", 0x2D
- .charmap "%", 0x5B
- .charmap "(", 0x5C
- .charmap ")", 0x5D
- .charmap "0", 0xA1
- .charmap "1", 0xA2
- .charmap "2", 0xA3
- .charmap "3", 0xA4
- .charmap "4", 0xA5
- .charmap "5", 0xA6
- .charmap "6", 0xA7
- .charmap "7", 0xA8
- .charmap "8", 0xA9
- .charmap "9", 0xAA
- .charmap "!", 0xAB
- .charmap "?", 0xAC
- .charmap ".", 0xAD
- .charmap "-", 0xAE
- .charmap "·", 0xAF
- .charmap "…", 0xB0
- .charmap "“", 0xB1
- .charmap "”", 0xB2
- .charmap "‘", 0xB3
- .charmap "’", 0xB4
- .charmap "♂", 0xB5
- .charmap "♀", 0xB6
- .charmap "¥", 0xB7
- .charmap ",", 0xB8
- .charmap "×", 0xB9
- .charmap "/", 0xBA
- .charmap "A", 0xBB
- .charmap "B", 0xBC
- .charmap "C", 0xBD
- .charmap "D", 0xBE
- .charmap "E", 0xBF
- .charmap "F", 0xC0
- .charmap "G", 0xC1
- .charmap "H", 0xC2
- .charmap "I", 0xC3
- .charmap "J", 0xC4
- .charmap "K", 0xC5
- .charmap "L", 0xC6
- .charmap "M", 0xC7
- .charmap "N", 0xC8
- .charmap "O", 0xC9
- .charmap "P", 0xCA
- .charmap "Q", 0xCB
- .charmap "R", 0xCC
- .charmap "S", 0xCD
- .charmap "T", 0xCE
- .charmap "U", 0xCF
- .charmap "V", 0xD0
- .charmap "W", 0xD1
- .charmap "X", 0xD2
- .charmap "Y", 0xD3
- .charmap "Z", 0xD4
- .charmap "a", 0xD5
- .charmap "b", 0xD6
- .charmap "c", 0xD7
- .charmap "d", 0xD8
- .charmap "e", 0xD9
- .charmap "f", 0xDA
- .charmap "g", 0xDB
- .charmap "h", 0xDC
- .charmap "i", 0xDD
- .charmap "j", 0xDE
- .charmap "k", 0xDF
- .charmap "l", 0xE0
- .charmap "m", 0xE1
- .charmap "n", 0xE2
- .charmap "o", 0xE3
- .charmap "p", 0xE4
- .charmap "q", 0xE5
- .charmap "r", 0xE6
- .charmap "s", 0xE7
- .charmap "t", 0xE8
- .charmap "u", 0xE9
- .charmap "v", 0xEA
- .charmap "w", 0xEB
- .charmap "x", 0xEC
- .charmap "y", 0xED
- .charmap "z", 0xEE
- .charmap ":", 0xF0
- .charmap "$", 0xFF
-
-@ Hiragana
- .charmap "あ", 0x01
- .charmap "い", 0x02
- .charmap "う", 0x03
- .charmap "え", 0x04
- .charmap "お", 0x05
- .charmap "か", 0x06
- .charmap "き", 0x07
- .charmap "く", 0x08
- .charmap "け", 0x09
- .charmap "こ", 0x0A
- .charmap "さ", 0x0B
- .charmap "し", 0x0C
- .charmap "す", 0x0D
- .charmap "せ", 0x0E
- .charmap "そ", 0x0F
- .charmap "た", 0x10
- .charmap "ち", 0x11
- .charmap "つ", 0x12
- .charmap "て", 0x13
- .charmap "と", 0x14
- .charmap "な", 0x15
- .charmap "に", 0x16
- .charmap "ぬ", 0x17
- .charmap "ね", 0x18
- .charmap "の", 0x19
- .charmap "は", 0x1A
- .charmap "ひ", 0x1B
- .charmap "ふ", 0x1C
- .charmap "へ", 0x1D
- .charmap "ほ", 0x1E
- .charmap "ま", 0x1F
- .charmap "み", 0x20
- .charmap "む", 0x21
- .charmap "め", 0x22
- .charmap "も", 0x23
- .charmap "や", 0x24
- .charmap "ゆ", 0x25
- .charmap "よ", 0x26
- .charmap "ら", 0x27
- .charmap "り", 0x28
- .charmap "る", 0x29
- .charmap "れ", 0x2A
- .charmap "ろ", 0x2B
- .charmap "わ", 0x2C
- .charmap "を", 0x2D
- .charmap "ん", 0x2E
- .charmap "ぁ", 0x2F
- .charmap "ぃ", 0x30
- .charmap "ぅ", 0x31
- .charmap "ぇ", 0x32
- .charmap "ぉ", 0x33
- .charmap "ゃ", 0x34
- .charmap "ゅ", 0x35
- .charmap "ょ", 0x36
- .charmap "が", 0x37
- .charmap "ぎ", 0x38
- .charmap "ぐ", 0x39
- .charmap "げ", 0x3A
- .charmap "ご", 0x3B
- .charmap "ざ", 0x3C
- .charmap "じ", 0x3D
- .charmap "ず", 0x3E
- .charmap "ぜ", 0x3F
- .charmap "ぞ", 0x40
- .charmap "だ", 0x41
- .charmap "ぢ", 0x42
- .charmap "づ", 0x43
- .charmap "で", 0x44
- .charmap "ど", 0x45
- .charmap "ば", 0x46
- .charmap "び", 0x47
- .charmap "ぶ", 0x48
- .charmap "べ", 0x49
- .charmap "ぼ", 0x4A
- .charmap "ぱ", 0x4B
- .charmap "ぴ", 0x4C
- .charmap "ぷ", 0x4D
- .charmap "ぺ", 0x4E
- .charmap "ぽ", 0x4F
- .charmap "っ", 0x50
-
-@ Katakana
- .charmap "ア", 0x51
- .charmap "イ", 0x52
- .charmap "ウ", 0x53
- .charmap "エ", 0x54
- .charmap "オ", 0x55
- .charmap "カ", 0x56
- .charmap "キ", 0x57
- .charmap "ク", 0x58
- .charmap "ケ", 0x59
- .charmap "コ", 0x5A
- .charmap "サ", 0x5B
- .charmap "シ", 0x5C
- .charmap "ス", 0x5D
- .charmap "セ", 0x5E
- .charmap "ソ", 0x5F
- .charmap "タ", 0x60
- .charmap "チ", 0x61
- .charmap "ツ", 0x62
- .charmap "テ", 0x63
- .charmap "ト", 0x64
- .charmap "ナ", 0x65
- .charmap "ニ", 0x66
- .charmap "ヌ", 0x67
- .charmap "ネ", 0x68
- .charmap "ノ", 0x69
- .charmap "ハ", 0x6A
- .charmap "ヒ", 0x6B
- .charmap "フ", 0x6C
- .charmap "ヘ", 0x6D
- .charmap "ホ", 0x6E
- .charmap "マ", 0x6F
- .charmap "ミ", 0x70
- .charmap "ム", 0x71
- .charmap "メ", 0x72
- .charmap "モ", 0x73
- .charmap "ヤ", 0x74
- .charmap "ユ", 0x75
- .charmap "ヨ", 0x76
- .charmap "ラ", 0x77
- .charmap "リ", 0x78
- .charmap "ル", 0x79
- .charmap "レ", 0x7A
- .charmap "ロ", 0x7B
- .charmap "ワ", 0x7C
- .charmap "ヲ", 0x7D
- .charmap "ン", 0x7E
- .charmap "ァ", 0x7F
- .charmap "ィ", 0x80
- .charmap "ゥ", 0x81
- .charmap "ェ", 0x82
- .charmap "ォ", 0x83
- .charmap "ャ", 0x84
- .charmap "ュ", 0x85
- .charmap "ョ", 0x86
- .charmap "ガ", 0x87
- .charmap "ギ", 0x88
- .charmap "グ", 0x89
- .charmap "ゲ", 0x8A
- .charmap "ゴ", 0x8B
- .charmap "ザ", 0x8C
- .charmap "ジ", 0x8D
- .charmap "ズ", 0x8E
- .charmap "ゼ", 0x8F
- .charmap "ゾ", 0x90
- .charmap "ダ", 0x91
- .charmap "ヂ", 0x92
- .charmap "ヅ", 0x93
- .charmap "デ", 0x94
- .charmap "ド", 0x95
- .charmap "バ", 0x96
- .charmap "ビ", 0x97
- .charmap "ブ", 0x98
- .charmap "ベ", 0x99
- .charmap "ボ", 0x9A
- .charmap "パ", 0x9B
- .charmap "ピ", 0x9C
- .charmap "プ", 0x9D
- .charmap "ペ", 0x9E
- .charmap "ポ", 0x9F
- .charmap "ッ", 0xA0
-
- .charmap "ー", 0xAE
-
- .charmap_const PK, 0x53
- .charmap_const PKMN, 0x53, 0x54
- .charmap_const POKEBLOCK, 0x55, 0x56, 0x57, 0x58, 0x59
-
- @ indicates the end of a town/city name (before " TOWN" or " CITY")
- .charmap_const NAME_END, 0xFC, 0x00
-
-@ string placeholders
- @ unknown (0xFD, 0x00)
- .charmap_const PLAYER, 0xFD, 0x01
- .charmap_const STR_VAR_1, 0xFD, 0x02
- .charmap_const STR_VAR_2, 0xFD, 0x03
- .charmap_const STR_VAR_3, 0xFD, 0x04
- @ unknown (0xFD, 0x05)
- .charmap_const RIVAL, 0xFD, 0x06
-@ version-dependent strings
- .charmap_const VERSION, 0xFD, 0x07 @ "RUBY" / "SAPPHIRE"
- .charmap_const EVIL_TEAM, 0xFD, 0x08 @ "MAGMA" / "AQUA"
- .charmap_const GOOD_TEAM, 0xFD, 0x09 @ "AQUA" / "MAGMA"
- .charmap_const EVIL_TEAM_LEADER, 0xFD, 0x0A @ "MAXIE" / "ARCHIE"
- .charmap_const GOOD_TEAM_LEADER, 0xFD, 0x0B @ "ARCHIE" / "MAXIE"
- .charmap_const CUR_VERSION_MASCOT, 0xFD, 0x0C @ "GROUDON" / "KYOGRE"
- .charmap_const OPP_VERSION_MASCOT, 0xFD, 0x0D @ "KYOGRE" / "GROUDON"
-
- .charmap_escape 'l', 0xFA @ scroll up window text
- .charmap_escape 'p', 0xFB @ new paragraph
- .charmap_escape 'n', 0xFE @ new line
diff --git a/charmap.txt b/charmap.txt
new file mode 100644
index 000000000..0bd2b7c9d
--- /dev/null
+++ b/charmap.txt
@@ -0,0 +1,280 @@
+' ' = 00
+'é' = 1B
+'&' = 2D
+'%' = 5B
+'(' = 5C
+')' = 5D
+'0' = A1
+'1' = A2
+'2' = A3
+'3' = A4
+'4' = A5
+'5' = A6
+'6' = A7
+'7' = A8
+'8' = A9
+'9' = AA
+'!' = AB
+'?' = AC
+'.' = AD
+'-' = AE
+'·' = AF
+'…' = B0
+'“' = B1
+'”' = B2
+'‘' = B3
+'’' = B4
+'♂' = B5
+'♀' = B6
+'¥' = B7
+',' = B8
+'×' = B9
+'/' = BA
+'A' = BB
+'B' = BC
+'C' = BD
+'D' = BE
+'E' = BF
+'F' = C0
+'G' = C1
+'H' = C2
+'I' = C3
+'J' = C4
+'K' = C5
+'L' = C6
+'M' = C7
+'N' = C8
+'O' = C9
+'P' = CA
+'Q' = CB
+'R' = CC
+'S' = CD
+'T' = CE
+'U' = CF
+'V' = D0
+'W' = D1
+'X' = D2
+'Y' = D3
+'Z' = D4
+'a' = D5
+'b' = D6
+'c' = D7
+'d' = D8
+'e' = D9
+'f' = DA
+'g' = DB
+'h' = DC
+'i' = DD
+'j' = DE
+'k' = DF
+'l' = E0
+'m' = E1
+'n' = E2
+'o' = E3
+'p' = E4
+'q' = E5
+'r' = E6
+'s' = E7
+'t' = E8
+'u' = E9
+'v' = EA
+'w' = EB
+'x' = EC
+'y' = ED
+'z' = EE
+':' = F0
+'$' = FF
+
+@ Hiragana
+'あ' = 01
+'い' = 02
+'う' = 03
+'え' = 04
+'お' = 05
+'か' = 06
+'き' = 07
+'く' = 08
+'け' = 09
+'こ' = 0A
+'さ' = 0B
+'し' = 0C
+'す' = 0D
+'せ' = 0E
+'そ' = 0F
+'た' = 10
+'ち' = 11
+'つ' = 12
+'て' = 13
+'と' = 14
+'な' = 15
+'に' = 16
+'ぬ' = 17
+'ね' = 18
+'の' = 19
+'は' = 1A
+'ひ' = 1B
+'ふ' = 1C
+'へ' = 1D
+'ほ' = 1E
+'ま' = 1F
+'み' = 20
+'む' = 21
+'め' = 22
+'も' = 23
+'や' = 24
+'ゆ' = 25
+'よ' = 26
+'ら' = 27
+'り' = 28
+'る' = 29
+'れ' = 2A
+'ろ' = 2B
+'わ' = 2C
+'を' = 2D
+'ん' = 2E
+'ぁ' = 2F
+'ぃ' = 30
+'ぅ' = 31
+'ぇ' = 32
+'ぉ' = 33
+'ゃ' = 34
+'ゅ' = 35
+'ょ' = 36
+'が' = 37
+'ぎ' = 38
+'ぐ' = 39
+'げ' = 3A
+'ご' = 3B
+'ざ' = 3C
+'じ' = 3D
+'ず' = 3E
+'ぜ' = 3F
+'ぞ' = 40
+'だ' = 41
+'ぢ' = 42
+'づ' = 43
+'で' = 44
+'ど' = 45
+'ば' = 46
+'び' = 47
+'ぶ' = 48
+'べ' = 49
+'ぼ' = 4A
+'ぱ' = 4B
+'ぴ' = 4C
+'ぷ' = 4D
+'ぺ' = 4E
+'ぽ' = 4F
+'っ' = 50
+
+@ Katakana
+'ア' = 51
+'イ' = 52
+'ウ' = 53
+'エ' = 54
+'オ' = 55
+'カ' = 56
+'キ' = 57
+'ク' = 58
+'ケ' = 59
+'コ' = 5A
+'サ' = 5B
+'シ' = 5C
+'ス' = 5D
+'セ' = 5E
+'ソ' = 5F
+'タ' = 60
+'チ' = 61
+'ツ' = 62
+'テ' = 63
+'ト' = 64
+'ナ' = 65
+'ニ' = 66
+'ヌ' = 67
+'ネ' = 68
+'ノ' = 69
+'ハ' = 6A
+'ヒ' = 6B
+'フ' = 6C
+'ヘ' = 6D
+'ホ' = 6E
+'マ' = 6F
+'ミ' = 70
+'ム' = 71
+'メ' = 72
+'モ' = 73
+'ヤ' = 74
+'ユ' = 75
+'ヨ' = 76
+'ラ' = 77
+'リ' = 78
+'ル' = 79
+'レ' = 7A
+'ロ' = 7B
+'ワ' = 7C
+'ヲ' = 7D
+'ン' = 7E
+'ァ' = 7F
+'ィ' = 80
+'ゥ' = 81
+'ェ' = 82
+'ォ' = 83
+'ャ' = 84
+'ュ' = 85
+'ョ' = 86
+'ガ' = 87
+'ギ' = 88
+'グ' = 89
+'ゲ' = 8A
+'ゴ' = 8B
+'ザ' = 8C
+'ジ' = 8D
+'ズ' = 8E
+'ゼ' = 8F
+'ゾ' = 90
+'ダ' = 91
+'ヂ' = 92
+'ヅ' = 93
+'デ' = 94
+'ド' = 95
+'バ' = 96
+'ビ' = 97
+'ブ' = 98
+'ベ' = 99
+'ボ' = 9A
+'パ' = 9B
+'ピ' = 9C
+'プ' = 9D
+'ペ' = 9E
+'ポ' = 9F
+'ッ' = A0
+
+'ー' = AE
+
+PK = 53
+PKMN = 53 54
+POKEBLOCK = 55 56 57 58 59
+
+@ indicates the end of a town/city name (before " TOWN" or " CITY")
+NAME_END = FC 00
+
+@ string placeholders
+@ unknown (FD 00)
+PLAYER = FD 01
+STR_VAR_1 = FD 02
+STR_VAR_2 = FD 03
+STR_VAR_3 = FD 04
+@ unknown (FD 05)
+RIVAL = FD 06
+@ version-dependent strings
+VERSION = FD 07 @ "RUBY" / "SAPPHIRE"
+EVIL_TEAM = FD 08 @ "MAGMA" / "AQUA"
+GOOD_TEAM = FD 09 @ "AQUA" / "MAGMA"
+EVIL_TEAM_LEADER = FD 0A @ "MAXIE" / "ARCHIE"
+GOOD_TEAM_LEADER = FD 0B @ "ARCHIE" / "MAXIE"
+CUR_VERSION_MASCOT = FD 0C @ "GROUDON" / "KYOGRE"
+OPP_VERSION_MASCOT = FD 0D @ "KYOGRE" / "GROUDON"
+
+'\l' = FA @ scroll up window text
+'\p' = FB @ new paragraph
+'\n' = FE @ new line
diff --git a/data/data1.s b/data/data1.s
index ebfa367a5..6428dbac6 100644
--- a/data/data1.s
+++ b/data/data1.s
@@ -2,7 +2,6 @@
.include "asm/macros.s"
.include "constants/constants.s"
- .include "asm/charmap.s"
.section script_data, "aw", %progbits
diff --git a/data/data2.s b/data/data2.s
index cdddccb8f..d0a3611c5 100644
--- a/data/data2.s
+++ b/data/data2.s
@@ -2,7 +2,6 @@
.include "asm/macros.s"
.include "constants/constants.s"
- .include "asm/charmap.s"
.section .rodata
diff --git a/fix_local_labels.pl b/fix_local_labels.pl
deleted file mode 100644
index d3258c766..000000000
--- a/fix_local_labels.pl
+++ /dev/null
@@ -1,28 +0,0 @@
-use strict;
-use warnings;
-
-open(IN_FILE, $ARGV[0]);
-open(OUT_FILE, ">", $ARGV[1]);
-
-my @labels = ();
-
-while (<IN_FILE>) {
- if ($_ =~ /^\.(.+):/) {
- push(@labels, $1);
- }
-}
-
-seek IN_FILE, 0, 0;
-
-while (<IN_FILE>) {
- for (my $i = 0; $i < scalar(@labels); $i++) {
- my $find = quotemeta '.' . $labels[$i];
- my $replace = '$' . $labels[$i];
- $_ =~ s/$find/$replace/;
- }
-
- print OUT_FILE $_;
-}
-
-close(IN_FILE);
-close(OUT_FILE);
diff --git a/tools/preproc/.gitignore b/tools/preproc/.gitignore
new file mode 100644
index 000000000..eb3470879
--- /dev/null
+++ b/tools/preproc/.gitignore
@@ -0,0 +1 @@
+preproc
diff --git a/tools/preproc/LICENSE b/tools/preproc/LICENSE
new file mode 100644
index 000000000..534d15349
--- /dev/null
+++ b/tools/preproc/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2016 YamaArashi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/tools/preproc/Makefile b/tools/preproc/Makefile
new file mode 100644
index 000000000..10f930ebd
--- /dev/null
+++ b/tools/preproc/Makefile
@@ -0,0 +1,15 @@
+CXX := g++
+
+CXXFLAGS := -std=c++14 -O2
+
+SRCS := asm_file.cpp charmap.cpp preproc.cpp utf8.cpp
+
+HEADERS := asm_file.h char_util.h charmap.h preproc.h utf8.h
+
+.PHONY: clean
+
+preproc: $(SRCS) $(HEADERS)
+ $(CXX) $(CXXFLAGS) $(SRCS) -o $@
+
+clean:
+ $(RM) preproc preproc.exe
diff --git a/tools/preproc/asm_file.cpp b/tools/preproc/asm_file.cpp
new file mode 100644
index 000000000..0b42d9ab2
--- /dev/null
+++ b/tools/preproc/asm_file.cpp
@@ -0,0 +1,563 @@
+// Copyright(c) 2016 YamaArashi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include <cstdio>
+#include <cstdarg>
+#include "preproc.h"
+#include "asm_file.h"
+#include "char_util.h"
+#include "utf8.h"
+
+AsmFile::AsmFile(std::string filename) : m_filename(filename)
+{
+ FILE *fp = std::fopen(filename.c_str(), "rb");
+
+ if (fp == NULL)
+ FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str());
+
+ std::fseek(fp, 0, SEEK_END);
+
+ m_size = std::ftell(fp);
+
+ m_buffer = new char[m_size + 1];
+
+ std::rewind(fp);
+
+ if (std::fread(m_buffer, m_size, 1, fp) != 1)
+ FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str());
+
+ m_buffer[m_size] = 0;
+
+ std::fclose(fp);
+
+ m_pos = 0;
+ m_lineNum = 1;
+ m_lineStart = 0;
+
+ RemoveComments();
+}
+
+AsmFile::AsmFile(AsmFile&& other) : m_filename(std::move(other.m_filename))
+{
+ m_buffer = other.m_buffer;
+ m_pos = other.m_pos;
+ m_size = other.m_size;
+ m_lineNum = other.m_lineNum;
+ m_lineStart = other.m_lineStart;
+
+ other.m_buffer = nullptr;
+}
+
+AsmFile::~AsmFile()
+{
+ delete[] m_buffer;
+}
+
+// Removes comments to simplify further processing.
+// It stops upon encountering a null character,
+// which may or may not be the end of file marker.
+// If it's not, the error will be caught later.
+void AsmFile::RemoveComments()
+{
+ long pos = 0;
+ bool inString = false;
+ char stringChar;
+
+ for (;;)
+ {
+ if (m_buffer[pos] == 0)
+ return;
+
+ if (inString)
+ {
+ if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == stringChar)
+ {
+ pos += 2;
+ }
+ else
+ {
+ if (m_buffer[pos] == stringChar)
+ inString = false;
+ pos++;
+ }
+ }
+ else if (m_buffer[pos] == '@' && (pos == 0 || m_buffer[pos - 1] != '\\'))
+ {
+ while (m_buffer[pos] != '\n' && m_buffer[pos] != 0)
+ m_buffer[pos++] = ' ';
+ }
+ else if (m_buffer[pos] == '/' && m_buffer[pos + 1] == '*')
+ {
+ m_buffer[pos++] = ' ';
+ m_buffer[pos++] = ' ';
+
+ bool inCommentString = false;
+ char commentStringChar;
+
+ for (;;)
+ {
+ if (m_buffer[pos] == 0)
+ return;
+
+ if (inCommentString)
+ {
+ if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == commentStringChar)
+ {
+ m_buffer[pos++] = ' ';
+ m_buffer[pos++] = ' ';
+ }
+ else
+ {
+ if (m_buffer[pos] == commentStringChar)
+ inCommentString = false;
+ if (m_buffer[pos] != '\n')
+ m_buffer[pos] = ' ';
+ pos++;
+ }
+ }
+ else
+ {
+ if (m_buffer[pos] == '*' && m_buffer[pos + 1] == '/')
+ {
+ m_buffer[pos++] = ' ';
+ m_buffer[pos++] = ' ';
+ break;
+ }
+ else
+ {
+ if (m_buffer[pos] == '"' || m_buffer[pos] == '\'')
+ {
+ commentStringChar = m_buffer[pos];
+ inCommentString = true;
+ }
+ if (m_buffer[pos] != '\n')
+ m_buffer[pos] = ' ';
+ pos++;
+ }
+ }
+ }
+ }
+ else
+ {
+ if (m_buffer[pos] == '"' || m_buffer[pos] == '\'')
+ {
+ stringChar = m_buffer[pos];
+ inString = true;
+ }
+ pos++;
+ }
+ }
+}
+
+// Checks if we're at a particular directive and if so, consumes it.
+// Returns whether the directive was found.
+bool AsmFile::CheckForDirective(std::string name)
+{
+ long i;
+ long length = static_cast<long>(name.length());
+
+ for (i = 0; i < length && m_pos + i < m_size; i++)
+ if (name[i] != m_buffer[m_pos + i])
+ return false;
+
+ if (i < length)
+ return false;
+
+ m_pos += length;
+
+ return true;
+}
+
+// Checks if we're at a known directive and if so, consumes it.
+// Returns which directive was found.
+Directive AsmFile::GetDirective()
+{
+ SkipWhitespace();
+
+ if (CheckForDirective(".include"))
+ return Directive::Include;
+ else if (CheckForDirective(".string"))
+ return Directive::String;
+ else
+ return Directive::Unknown;
+}
+
+// Skips tabs and spaces.
+void AsmFile::SkipWhitespace()
+{
+ while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ')
+ m_pos++;
+}
+
+// Reads include path.
+std::string AsmFile::ReadPath()
+{
+ SkipWhitespace();
+
+ if (m_buffer[m_pos] != '"')
+ RaiseError("expected file path");
+
+ m_pos++;
+
+ int length = 0;
+ long startPos = m_pos;
+
+ while (m_buffer[m_pos] != '"')
+ {
+ unsigned char c = m_buffer[m_pos++];
+
+ if (c == 0)
+ {
+ if (m_pos >= m_size)
+ RaiseError("unexpected EOF in include string");
+ else
+ RaiseError("unexpected null character in include string");
+ }
+
+ if (!IsAsciiPrintable(c))
+ RaiseError("unexpected character '\\x%02X' in include string", c);
+
+ // Don't bother allowing any escape sequences.
+ if (c == '\\')
+ {
+ c = m_buffer[m_pos];
+ RaiseError("unexpected escape '\\%c' in include string", c);
+ }
+
+ length++;
+
+ if (length > kMaxPath)
+ RaiseError("path is too long");
+ }
+
+ m_pos++; // Go past the right quote.
+
+ ExpectEmptyRestOfLine();
+
+ return std::string(&m_buffer[startPos], length);
+}
+
+// Reads a charmap char or escape sequence.
+std::string AsmFile::ReadCharOrEscape()
+{
+ std::string sequence;
+
+ bool isEscape = (m_buffer[m_pos] == '\\');
+
+ if (isEscape)
+ {
+ m_pos++;
+
+ if (m_buffer[m_pos] == '"')
+ {
+ sequence = g_charmap->Char('"');
+
+ if (sequence.length() == 0)
+ RaiseError("no mapping exists for double quote");
+
+ return sequence;
+ }
+ else if (m_buffer[m_pos] == '\\')
+ {
+ sequence = g_charmap->Char('\\');
+
+ if (sequence.length() == 0)
+ RaiseError("no mapping exists for backslash");
+
+ return sequence;
+ }
+ }
+
+ unsigned char c = m_buffer[m_pos];
+
+ if (c == 0)
+ {
+ if (m_pos >= m_size)
+ RaiseError("unexpected EOF in UTF-8 string");
+ else
+ RaiseError("unexpected null character in UTF-8 string");
+ }
+
+ if (IsAscii(c) && !IsAsciiPrintable(c))
+ RaiseError("unexpected character U+%X in UTF-8 string", c);
+
+ UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]);
+ m_pos += unicodeChar.encodingLength;
+ std::int32_t code = unicodeChar.code;
+
+ if (code == -1)
+ RaiseError("invalid encoding in UTF-8 string");
+
+ if (isEscape && code >= 128)
+ RaiseError("escapes using non-ASCII characters are invalid");
+
+ sequence = isEscape ? g_charmap->Escape(code) : g_charmap->Char(code);
+
+ if (sequence.length() == 0)
+ {
+ if (isEscape)
+ RaiseError("unknown escape '\\%c'", code);
+ else
+ RaiseError("unknown character U+%X", code);
+ }
+
+ return sequence;
+}
+
+// Reads a charmap constant, i.e. "{FOO}".
+std::string AsmFile::ReadConstant()
+{
+ m_pos++; // Assume we're on the left curly bracket.
+
+ long startPos = m_pos;
+
+ while (IsIdentifierChar(m_buffer[m_pos]))
+ m_pos++;
+
+ if (m_buffer[m_pos] != '}')
+ {
+ if (m_buffer[m_pos] == 0)
+ {
+ if (m_pos >= m_size)
+ RaiseError("unexpected EOF in identifier");
+ else
+ RaiseError("unexpected null character in identifier");
+ }
+
+ RaiseError("unexpected character in identifier");
+ }
+
+ std::string sequence = g_charmap->Constant(std::string(&m_buffer[startPos], m_pos - startPos));
+
+ if (sequence.length() == 0)
+ {
+ m_buffer[m_pos] = 0;
+ RaiseError("unknown constant '%s'", &m_buffer[startPos]);
+ }
+
+ m_pos++; // Go past the right curly bracket.
+
+ return sequence;
+}
+
+// Reads a charmap string.
+int AsmFile::ReadString(unsigned char* s)
+{
+ SkipWhitespace();
+
+ if (m_buffer[m_pos] != '"')
+ RaiseError("expected UTF-8 string literal");
+
+ m_pos++;
+
+ int length = 0;
+
+ while (m_buffer[m_pos] != '"')
+ {
+ std::string sequence = (m_buffer[m_pos] == '{') ? ReadConstant() : ReadCharOrEscape();
+
+ for (const char& c : sequence)
+ {
+ if (length == kMaxStringLength)
+ RaiseError("mapped string longer than %d bytes", length);
+
+ s[length++] = c;
+ }
+ }
+
+ m_pos++; // Go past the right quote.
+
+ SkipWhitespace();
+
+ if (ConsumeComma())
+ {
+ SkipWhitespace();
+ int padLength = ReadPadLength();
+
+ while (length < padLength)
+ {
+ s[length++] = 0;
+ }
+ }
+
+ ExpectEmptyRestOfLine();
+
+ return length;
+}
+
+// If we're at a comma, consumes it.
+// Returns whether a comma was found.
+bool AsmFile::ConsumeComma()
+{
+ if (m_buffer[m_pos] == ',')
+ {
+ m_pos++;
+ return true;
+ }
+
+ return false;
+}
+
+// Converts digit character to numerical value.
+static int ConvertDigit(char c, int radix)
+{
+ int digit;
+
+ if (c >= '0' && c <= '9')
+ digit = c - '0';
+ else if (c >= 'A' && c <= 'F')
+ digit = 10 + c - 'A';
+ else if (c >= 'a' && c <= 'f')
+ digit = 10 + c - 'a';
+ else
+ return -1;
+
+ return (digit < radix) ? digit : -1;
+}
+
+// Reads the pad length for a charmap string.
+int AsmFile::ReadPadLength()
+{
+ if (!IsAsciiDigit(m_buffer[m_pos]))
+ RaiseError("expected integer");
+
+ int radix = 10;
+
+ if (m_buffer[m_pos] == '0' && m_buffer[m_pos + 1] == 'x')
+ {
+ radix = 16;
+ m_pos += 2;
+ }
+
+ int n = 0;
+ int digit;
+
+ while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1)
+ {
+ n = n * radix + digit;
+
+ if (n > kMaxStringLength)
+ RaiseError("pad length greater than maximum length (%d)", kMaxStringLength);
+
+ m_pos++;
+ }
+
+ return n;
+}
+
+// Outputs the current line and moves to the next one.
+void AsmFile::OutputLine()
+{
+ while (m_buffer[m_pos] != '\n' && m_buffer[m_pos] != 0)
+ m_pos++;
+
+ if (m_buffer[m_pos] == 0)
+ {
+ if (m_pos >= m_size)
+ {
+ RaiseWarning("file doesn't end with newline");
+ puts(&m_buffer[m_lineStart]);
+ }
+ else
+ {
+ RaiseError("unexpected null character");
+ }
+ }
+ else
+ {
+ m_buffer[m_pos] = 0;
+ puts(&m_buffer[m_lineStart]);
+ m_buffer[m_pos] = '\n';
+ m_pos++;
+ m_lineStart = m_pos;
+ m_lineNum++;
+ }
+}
+
+// Asserts that the rest of the line is empty and moves to the next one.
+void AsmFile::ExpectEmptyRestOfLine()
+{
+ SkipWhitespace();
+
+ if (m_buffer[m_pos] == 0)
+ {
+ if (m_pos >= m_size)
+ RaiseWarning("file doesn't end with newline");
+ else
+ RaiseError("unexpected null character");
+ }
+ else if (m_buffer[m_pos] == '\n')
+ {
+ m_pos++;
+ m_lineStart = m_pos;
+ m_lineNum++;
+ }
+ else if (m_buffer[m_pos] == '\r')
+ {
+ RaiseError("only Unix-style LF newlines are supported");
+ }
+ else
+ {
+ RaiseError("junk at end of line");
+ }
+}
+
+// Checks if we're at the end of the file.
+bool AsmFile::IsAtEnd()
+{
+ return (m_pos >= m_size);
+}
+
+// Output the current location to set gas's logical file and line numbers.
+void AsmFile::OutputLocation()
+{
+ printf("# %ld \"%s\"\n", m_lineNum, m_filename.c_str());
+}
+
+// Reports a diagnostic message.
+void AsmFile::ReportDiagnostic(const char* type, const char* format, std::va_list args)
+{
+ const int bufferSize = 1024;
+ char buffer[bufferSize];
+ std::vsnprintf(buffer, bufferSize, format, args);
+ std::fprintf(stderr, "%s:%ld: %s: %s\n", m_filename.c_str(), m_lineNum, type, buffer);
+}
+
+#define DO_REPORT(type) \
+do \
+{ \
+ std::va_list args; \
+ va_start(args, format); \
+ ReportDiagnostic(type, format, args); \
+ va_end(args); \
+} while (0)
+
+// Reports an error diagnostic and terminates the program.
+void AsmFile::RaiseError(const char* format, ...)
+{
+ DO_REPORT("error");
+ exit(1);
+}
+
+// Reports a warning diagnostic.
+void AsmFile::RaiseWarning(const char* format, ...)
+{
+ DO_REPORT("warning");
+}
diff --git a/tools/preproc/asm_file.h b/tools/preproc/asm_file.h
new file mode 100644
index 000000000..1c137d7d3
--- /dev/null
+++ b/tools/preproc/asm_file.h
@@ -0,0 +1,71 @@
+// Copyright(c) 2016 YamaArashi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef ASM_FILE_H
+#define ASM_FILE_H
+
+#include <cstdarg>
+#include <cstdint>
+#include <string>
+#include "preproc.h"
+
+enum class Directive
+{
+ Include,
+ String,
+ Unknown
+};
+
+class AsmFile
+{
+public:
+ AsmFile(std::string filename);
+ AsmFile(AsmFile&& other);
+ AsmFile(const AsmFile&) = delete;
+ ~AsmFile();
+ Directive GetDirective();
+ std::string ReadPath();
+ int ReadString(unsigned char* s);
+ bool IsAtEnd();
+ void OutputLine();
+ void OutputLocation();
+
+private:
+ char* m_buffer;
+ long m_pos;
+ long m_size;
+ long m_lineNum;
+ long m_lineStart;
+ std::string m_filename;
+
+ bool ConsumeComma();
+ int ReadPadLength();
+ void RemoveComments();
+ bool CheckForDirective(std::string name);
+ std::string ReadCharOrEscape();
+ std::string ReadConstant();
+ void SkipWhitespace();
+ void ExpectEmptyRestOfLine();
+ void ReportDiagnostic(const char* type, const char* format, std::va_list args);
+ void RaiseError(const char* format, ...);
+ void RaiseWarning(const char *format, ...);
+};
+
+#endif // ASM_FILE_H
diff --git a/tools/preproc/char_util.h b/tools/preproc/char_util.h
new file mode 100644
index 000000000..05f9a1dd4
--- /dev/null
+++ b/tools/preproc/char_util.h
@@ -0,0 +1,65 @@
+// Copyright(c) 2016 YamaArashi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef CHAR_UTIL_H
+#define CHAR_UTIL_H
+
+#include <cstdint>
+#include <cassert>
+
+inline bool IsAscii(unsigned char c)
+{
+ return (c < 128);
+}
+
+inline bool IsAsciiAlpha(unsigned char c)
+{
+ return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'));
+}
+
+inline bool IsAsciiDigit(unsigned char c)
+{
+ return (c >= '0' && c <= '9');
+}
+
+inline bool IsAsciiHexDigit(unsigned char c)
+{
+ return ((c >= '0' && c <= '9')
+ || (c >= 'a' && c <= 'f')
+ || (c >= 'A' && c <= 'F'));
+}
+
+inline bool IsAsciiAlphanum(unsigned char c)
+{
+ return (IsAsciiAlpha(c) || IsAsciiDigit(c));
+}
+
+inline bool IsAsciiPrintable(unsigned char c)
+{
+ return (c >= ' ' && c <= '~');
+}
+
+// Returns whether the character can be used in the identifier of a "{FOO}" constant in strings.
+inline bool IsIdentifierChar(unsigned char c)
+{
+ return IsAsciiAlphanum(c) || c == '_';
+}
+
+#endif // CHAR_UTIL_H
diff --git a/tools/preproc/charmap.cpp b/tools/preproc/charmap.cpp
new file mode 100644
index 000000000..de693eda5
--- /dev/null
+++ b/tools/preproc/charmap.cpp
@@ -0,0 +1,397 @@
+// Copyright(c) 2016 YamaArashi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include <cstdio>
+#include <cstdint>
+#include <cstdarg>
+#include "preproc.h"
+#include "charmap.h"
+#include "char_util.h"
+#include "utf8.h"
+
+enum LhsType
+{
+ Char,
+ Escape,
+ Constant,
+ None
+};
+
+struct Lhs
+{
+ LhsType type;
+ std::string name;
+ std::int32_t code;
+};
+
+class CharmapReader
+{
+public:
+ CharmapReader(std::string filename);
+ CharmapReader(const CharmapReader&) = delete;
+ ~CharmapReader();
+ Lhs ReadLhs();
+ void ExpectEqualsSign();
+ std::string ReadSequence();
+ void ExpectEmptyRestOfLine();
+
+private:
+ char* m_buffer;
+ long m_pos;
+ long m_size;
+ long m_lineNum;
+ std::string m_filename;
+
+ void RaiseError(const char* format, ...);
+ void RemoveComments();
+ std::string ReadConstant();
+ void SkipWhitespace();
+};
+
+CharmapReader::CharmapReader(std::string filename) : m_filename(filename)
+{
+ FILE *fp = std::fopen(filename.c_str(), "rb");
+
+ if (fp == NULL)
+ FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str());
+
+ std::fseek(fp, 0, SEEK_END);
+
+ m_size = std::ftell(fp);
+
+ m_buffer = new char[m_size + 1];
+
+ std::rewind(fp);
+
+ if (std::fread(m_buffer, m_size, 1, fp) != 1)
+ FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str());
+
+ m_buffer[m_size] = 0;
+
+ std::fclose(fp);
+
+ m_pos = 0;
+ m_lineNum = 1;
+
+ RemoveComments();
+}
+
+CharmapReader::~CharmapReader()
+{
+ delete[] m_buffer;
+}
+
+Lhs CharmapReader::ReadLhs()
+{
+ Lhs lhs;
+
+ for (;;)
+ {
+ SkipWhitespace();
+
+ if (m_buffer[m_pos] == '\n')
+ {
+ m_pos++;
+ m_lineNum++;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ if (m_buffer[m_pos] == '\'')
+ {
+ m_pos++;
+
+ bool isEscape = (m_buffer[m_pos] == '\\');
+
+ if (isEscape)
+ {
+ m_pos++;
+ }
+
+ unsigned char c = m_buffer[m_pos];
+
+ if (c == 0)
+ {
+ if (m_pos >= m_size)
+ RaiseError("unexpected EOF in UTF-8 character literal");
+ else
+ RaiseError("unexpected null character in UTF-8 character literal");
+ }
+
+ if (IsAscii(c) && !IsAsciiPrintable(c))
+ RaiseError("unexpected character U+%X in UTF-8 character literal", c);
+
+ UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]);
+ std::int32_t code = unicodeChar.code;
+
+ if (code == -1)
+ RaiseError("invalid encoding in UTF-8 character literal");
+
+ m_pos += unicodeChar.encodingLength;
+
+ if (m_buffer[m_pos] != '\'')
+ RaiseError("unterminated character literal");
+
+ m_pos++;
+
+ lhs.code = code;
+
+ if (isEscape)
+ {
+ if (code >= 128)
+ RaiseError("escapes using non-ASCII characters are invalid");
+
+ switch (code)
+ {
+ case '\'':
+ lhs.type = LhsType::Char;
+ break;
+ case '\\':
+ lhs.type = LhsType::Char;
+ case '"':
+ RaiseError("cannot escape double quote");
+ break;
+ default:
+ lhs.type = LhsType::Escape;
+ }
+ }
+ else
+ {
+ if (code == '\'')
+ RaiseError("empty character literal");
+
+ lhs.type = LhsType::Char;
+ }
+ }
+ else if (IsIdentifierChar(m_buffer[m_pos]))
+ {
+ lhs.type = LhsType::Constant;
+ lhs.name = ReadConstant();
+ }
+ else if (m_buffer[m_pos] == '\r')
+ {
+ RaiseError("only Unix-style LF newlines are supported");
+ }
+ else if (m_buffer[m_pos] == 0)
+ {
+ if (m_pos < m_size)
+ RaiseError("unexpected null character");
+ lhs.type = LhsType::None;
+ }
+ else
+ {
+ RaiseError("junk at start of line");
+ }
+
+ return lhs;
+}
+
+void CharmapReader::ExpectEqualsSign()
+{
+ SkipWhitespace();
+
+ if (m_buffer[m_pos] != '=')
+ RaiseError("expected equals sign");
+
+ m_pos++;
+}
+
+static unsigned int ConvertHexDigit(char c)
+{
+ unsigned int digit = 0;
+
+ if (c >= '0' && c <= '9')
+ digit = c - '0';
+ else if (c >= 'A' && c <= 'F')
+ digit = 10 + c - 'A';
+ else if (c >= 'a' && c <= 'f')
+ digit = 10 + c - 'a';
+
+ return digit;
+}
+
+std::string CharmapReader::ReadSequence()
+{
+ SkipWhitespace();
+
+ long startPos = m_pos;
+
+ unsigned int length = 0;
+
+ while (IsAsciiHexDigit(m_buffer[m_pos]) && IsAsciiHexDigit(m_buffer[m_pos + 1]))
+ {
+ m_pos += 2;
+ length++;
+
+ if (length > kMaxCharmapSequenceLength)
+ RaiseError("byte sequence too long (max is %lu bytes)", kMaxCharmapSequenceLength);
+
+ SkipWhitespace();
+ }
+
+ if (IsAsciiHexDigit(m_buffer[m_pos]))
+ RaiseError("each byte must have 2 hex digits");
+
+ if (length == 0)
+ RaiseError("expected byte sequence");
+
+ std::string sequence;
+ sequence.reserve(length);
+
+ m_pos = startPos;
+
+ for (unsigned int i = 0; i < length; i++)
+ {
+ unsigned int digit1 = ConvertHexDigit(m_buffer[m_pos]);
+ unsigned int digit2 = ConvertHexDigit(m_buffer[m_pos + 1]);
+ unsigned char byte = digit1 * 16 + digit2;
+ sequence += byte;
+
+ m_pos += 2;
+ SkipWhitespace();
+ }
+
+ return sequence;
+}
+
+void CharmapReader::ExpectEmptyRestOfLine()
+{
+ SkipWhitespace();
+
+ if (m_buffer[m_pos] == 0)
+ {
+ if (m_pos < m_size)
+ RaiseError("unexpected null character");
+ }
+ else if (m_buffer[m_pos] == '\n')
+ {
+ m_pos++;
+ m_lineNum++;
+ }
+ else if (m_buffer[m_pos] == '\r')
+ {
+ RaiseError("only Unix-style LF newlines are supported");
+ }
+ else
+ {
+ RaiseError("junk at end of line");
+ }
+}
+
+void CharmapReader::RaiseError(const char* format, ...)
+{
+ const int bufferSize = 1024;
+ char buffer[bufferSize];
+
+ std::va_list args;
+ va_start(args, format);
+ std::vsnprintf(buffer, bufferSize, format, args);
+ va_end(args);
+
+ std::fprintf(stderr, "%s:%ld: error: %s\n", m_filename.c_str(), m_lineNum, buffer);
+}
+
+void CharmapReader::RemoveComments()
+{
+ long pos = 0;
+ bool inString = false;
+
+ for (;;)
+ {
+ if (m_buffer[pos] == 0)
+ return;
+
+ if (inString)
+ {
+ if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == '\'')
+ {
+ pos += 2;
+ }
+ else
+ {
+ if (m_buffer[pos] == '\'')
+ inString = false;
+ pos++;
+ }
+ }
+ else if (m_buffer[pos] == '@')
+ {
+ while (m_buffer[pos] != '\n' && m_buffer[pos] != 0)
+ m_buffer[pos++] = ' ';
+ }
+ else
+ {
+ if (m_buffer[pos] == '\'')
+ inString = true;
+ pos++;
+ }
+ }
+}
+
+std::string CharmapReader::ReadConstant()
+{
+ long startPos = m_pos;
+
+ while (IsIdentifierChar(m_buffer[m_pos]))
+ m_pos++;
+
+ return std::string(&m_buffer[startPos], m_pos - startPos);
+}
+
+void CharmapReader::SkipWhitespace()
+{
+ while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ')
+ m_pos++;
+}
+
+Charmap::Charmap(std::string filename)
+{
+ CharmapReader reader(filename);
+
+ for (;;)
+ {
+ Lhs lhs = reader.ReadLhs();
+
+ if (lhs.type == LhsType::None)
+ return;
+
+ reader.ExpectEqualsSign();
+
+ std::string sequence = reader.ReadSequence();
+
+ switch (lhs.type)
+ {
+ case LhsType::Char:
+ m_chars[lhs.code] = sequence;
+ break;
+ case LhsType::Escape:
+ m_escapes[lhs.code] = sequence;
+ break;
+ case LhsType::Constant:
+ m_constants[lhs.name] = sequence;
+ break;
+ }
+
+ reader.ExpectEmptyRestOfLine();
+ }
+}
diff --git a/tools/preproc/charmap.h b/tools/preproc/charmap.h
new file mode 100644
index 000000000..0d752ac99
--- /dev/null
+++ b/tools/preproc/charmap.h
@@ -0,0 +1,64 @@
+// Copyright(c) 2016 YamaArashi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef CHARMAP_H
+#define CHARMAP_H
+
+#include <cstdint>
+#include <string>
+#include <map>
+#include <vector>
+
+class Charmap
+{
+public:
+ Charmap(std::string filename);
+
+ std::string Char(std::int32_t code)
+ {
+ auto it = m_chars.find(code);
+
+ if (it == m_chars.end())
+ return std::string();
+
+ return it->second;
+ }
+
+ std::string Escape(unsigned char code)
+ {
+ return m_escapes[code];
+ }
+
+ std::string Constant(std::string identifier)
+ {
+ auto it = m_constants.find(identifier);
+
+ if (it == m_constants.end())
+ return std::string();
+
+ return it->second;
+ }
+private:
+ std::map<std::int32_t, std::string> m_chars;
+ std::string m_escapes[128];
+ std::map<std::string, std::string> m_constants;
+};
+
+#endif // CHARMAP_H
diff --git a/tools/preproc/preproc.cpp b/tools/preproc/preproc.cpp
new file mode 100644
index 000000000..7fc3f5c10
--- /dev/null
+++ b/tools/preproc/preproc.cpp
@@ -0,0 +1,83 @@
+// Copyright(c) 2016 YamaArashi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include <stack>
+#include "preproc.h"
+#include "asm_file.h"
+#include "charmap.h"
+
+Charmap* g_charmap;
+
+int main(int argc, char **argv)
+{
+ if (argc != 3)
+ {
+ fprintf(stderr, "Usage: %s ASM_FILE CHARMAP_FILE", argv[0]);
+ return 1;
+ }
+
+ g_charmap = new Charmap(argv[2]);
+
+ std::stack<AsmFile> stack;
+
+ stack.push(AsmFile(argv[1]));
+
+ for (;;)
+ {
+ while (stack.top().IsAtEnd())
+ {
+ stack.pop();
+
+ if (stack.empty())
+ return 0;
+ else
+ stack.top().OutputLocation();
+ }
+
+ Directive directive = stack.top().GetDirective();
+
+ switch (directive)
+ {
+ case Directive::Include:
+ stack.push(AsmFile(stack.top().ReadPath()));
+ stack.top().OutputLocation();
+ break;
+ case Directive::String:
+ {
+ unsigned char s[kMaxStringLength];
+ int length = stack.top().ReadString(s);
+
+ printf("\t.byte ");
+ for (int i = 0; i < length; i++)
+ {
+ printf("0x%02X", s[i]);
+
+ if (i < length - 1)
+ printf(", ");
+ }
+ putchar('\n');
+ break;
+ }
+ case Directive::Unknown:
+ stack.top().OutputLine();
+ break;
+ }
+ }
+}
diff --git a/tools/preproc/preproc.h b/tools/preproc/preproc.h
new file mode 100644
index 000000000..c9e1a8414
--- /dev/null
+++ b/tools/preproc/preproc.h
@@ -0,0 +1,52 @@
+// Copyright(c) 2016 YamaArashi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef PREPROC_H
+#define PREPROC_H
+
+#include "charmap.h"
+
+#ifdef _MSC_VER
+
+#define FATAL_ERROR(format, ...) \
+do \
+{ \
+ fprintf(stderr, format, __VA_ARGS__); \
+ exit(1); \
+} while (0)
+
+#else
+
+#define FATAL_ERROR(format, ...) \
+do \
+{ \
+ fprintf(stderr, format, ##__VA_ARGS__); \
+ exit(1); \
+} while (0)
+
+#endif // _MSC_VER
+
+const int kMaxPath = 256;
+const int kMaxStringLength = 256;
+const unsigned long kMaxCharmapSequenceLength = 16;
+
+extern Charmap* g_charmap;
+
+#endif // PREPROC_H
diff --git a/tools/preproc/utf8.cpp b/tools/preproc/utf8.cpp
new file mode 100644
index 000000000..0aed83f4a
--- /dev/null
+++ b/tools/preproc/utf8.cpp
@@ -0,0 +1,94 @@
+// Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
+//
+// Copyright(c) 2016 YamaArashi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include <cstdint>
+#include "utf8.h"
+
+static const unsigned char s_byteTypeTable[] =
+{
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
+ 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
+ 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
+};
+
+const unsigned char s0 = 0 * 12;
+const unsigned char s1 = 1 * 12;
+const unsigned char s2 = 2 * 12;
+const unsigned char s3 = 3 * 12;
+const unsigned char s4 = 4 * 12;
+const unsigned char s5 = 5 * 12;
+const unsigned char s6 = 6 * 12;
+const unsigned char s7 = 7 * 12;
+const unsigned char s8 = 8 * 12;
+
+static const unsigned char s_transitionTable[] =
+{
+ s0,s1,s2,s3,s5,s8,s7,s1,s1,s1,s4,s6, // s0
+ s1,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1, // s1
+ s1,s0,s1,s1,s1,s1,s1,s0,s1,s0,s1,s1, // s2
+ s1,s2,s1,s1,s1,s1,s1,s2,s1,s2,s1,s1, // s3
+ s1,s1,s1,s1,s1,s1,s1,s2,s1,s1,s1,s1, // s4
+ s1,s2,s1,s1,s1,s1,s1,s1,s1,s2,s1,s1, // s5
+ s1,s1,s1,s1,s1,s1,s1,s3,s1,s3,s1,s1, // s6
+ s1,s3,s1,s1,s1,s1,s1,s3,s1,s3,s1,s1, // s7
+ s1,s3,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1, // s8
+};
+
+// Decodes UTF-8 encoded Unicode code point at "s".
+// If the encoding is valid, it returns the code point and advances "s" past the byte sequence.
+// If the encoding is not valid, it returns -1 and doesn't advance "s".
+UnicodeChar DecodeUtf8(const char* s)
+{
+ UnicodeChar unicodeChar;
+ int state = s0;
+ auto start = s;
+
+ do
+ {
+ unsigned char byte = *s++;
+ int type = s_byteTypeTable[byte];
+
+ if (state == s0)
+ unicodeChar.code = (0xFF >> type) & byte;
+ else
+ unicodeChar.code = (unicodeChar.code << 6) | (byte & 0x3F);
+
+ state = s_transitionTable[state + type];
+
+ if (state == s1)
+ {
+ unicodeChar.code = -1;
+ return unicodeChar;
+ }
+ } while (state != s0);
+
+ unicodeChar.encodingLength = s - start;
+
+ return unicodeChar;
+}
diff --git a/tools/preproc/utf8.h b/tools/preproc/utf8.h
new file mode 100644
index 000000000..259de67c5
--- /dev/null
+++ b/tools/preproc/utf8.h
@@ -0,0 +1,34 @@
+// Copyright(c) 2016 YamaArashi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef UTF8_H
+#define UTF8_H
+
+#include <cstdint>
+
+struct UnicodeChar
+{
+ std::int32_t code;
+ int encodingLength;
+};
+
+UnicodeChar DecodeUtf8(const char* s);
+
+#endif // UTF8_H