summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/preproc/Makefile6
-rw-r--r--tools/preproc/asm_file.cpp168
-rw-r--r--tools/preproc/asm_file.h4
-rw-r--r--tools/preproc/c_file.cpp199
-rw-r--r--tools/preproc/c_file.h50
-rw-r--r--tools/preproc/charmap.cpp3
-rw-r--r--tools/preproc/preproc.cpp70
-rw-r--r--tools/preproc/preproc.h4
-rw-r--r--tools/preproc/string_parser.cpp355
-rw-r--r--tools/preproc/string_parser.h55
10 files changed, 744 insertions, 170 deletions
diff --git a/tools/preproc/Makefile b/tools/preproc/Makefile
index f504e45bd..24f60e3f6 100644
--- a/tools/preproc/Makefile
+++ b/tools/preproc/Makefile
@@ -2,9 +2,11 @@ CXX := g++
CXXFLAGS := -std=c++14 -O2 -Wall -Wno-switch
-SRCS := asm_file.cpp charmap.cpp preproc.cpp utf8.cpp
+SRCS := asm_file.cpp c_file.cpp charmap.cpp preproc.cpp string_parser.cpp \
+ utf8.cpp
-HEADERS := asm_file.h char_util.h charmap.h preproc.h utf8.h
+HEADERS := asm_file.h c_file.h char_util.h charmap.h preproc.h string_parser.h \
+ utf8.h
.PHONY: clean
diff --git a/tools/preproc/asm_file.cpp b/tools/preproc/asm_file.cpp
index b05ebab3f..b843d640b 100644
--- a/tools/preproc/asm_file.cpp
+++ b/tools/preproc/asm_file.cpp
@@ -24,6 +24,7 @@
#include "asm_file.h"
#include "char_util.h"
#include "utf8.h"
+#include "string_parser.h"
AsmFile::AsmFile(std::string filename) : m_filename(filename)
{
@@ -36,6 +37,9 @@ AsmFile::AsmFile(std::string filename) : m_filename(filename)
m_size = std::ftell(fp);
+ if (m_size < 0)
+ FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str());
+
m_buffer = new char[m_size + 1];
std::rewind(fp);
@@ -246,169 +250,29 @@ std::string AsmFile::ReadPath()
return std::string(&m_buffer[startPos], length);
}
-// Reads a charmap char or escape sequence.
-std::string AsmFile::ReadCharOrEscape()
-{
- std::string sequence;
-
- bool isEscape = (m_buffer[m_pos] == '\\');
-
- if (isEscape)
- {
- m_pos++;
-
- if (m_buffer[m_pos] == '"')
- {
- sequence = g_charmap->Char('"');
-
- if (sequence.length() == 0)
- RaiseError("no mapping exists for double quote");
-
- return sequence;
- }
- else if (m_buffer[m_pos] == '\\')
- {
- sequence = g_charmap->Char('\\');
-
- if (sequence.length() == 0)
- RaiseError("no mapping exists for backslash");
-
- return sequence;
- }
- }
-
- unsigned char c = m_buffer[m_pos];
-
- if (c == 0)
- {
- if (m_pos >= m_size)
- RaiseError("unexpected EOF in UTF-8 string");
- else
- RaiseError("unexpected null character in UTF-8 string");
- }
-
- if (IsAscii(c) && !IsAsciiPrintable(c))
- RaiseError("unexpected character U+%X in UTF-8 string", c);
-
- UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]);
- m_pos += unicodeChar.encodingLength;
- std::int32_t code = unicodeChar.code;
-
- if (code == -1)
- RaiseError("invalid encoding in UTF-8 string");
-
- if (isEscape && code >= 128)
- RaiseError("escapes using non-ASCII characters are invalid");
-
- sequence = isEscape ? g_charmap->Escape(code) : g_charmap->Char(code);
-
- if (sequence.length() == 0)
- {
- if (isEscape)
- RaiseError("unknown escape '\\%c'", code);
- else
- RaiseError("unknown character U+%X", code);
- }
-
- return sequence;
-}
-
-// Reads a charmap constant, i.e. "{FOO}".
-std::string AsmFile::ReadBracketedConstants()
-{
- std::string totalSequence;
-
- m_pos++; // Assume we're on the left curly bracket.
-
- while (m_buffer[m_pos] != '}')
- {
- SkipWhitespace();
-
- if (IsIdentifierStartingChar(m_buffer[m_pos]))
- {
- long startPos = m_pos;
-
- m_pos++;
-
- while (IsIdentifierChar(m_buffer[m_pos]))
- m_pos++;
-
- std::string sequence = g_charmap->Constant(std::string(&m_buffer[startPos], m_pos - startPos));
-
- if (sequence.length() == 0)
- {
- m_buffer[m_pos] = 0;
- RaiseError("unknown constant '%s'", &m_buffer[startPos]);
- }
-
- totalSequence += sequence;
- }
- else if (IsAsciiDigit(m_buffer[m_pos]))
- {
- int value = ReadInteger(255);
-
- if (value == -1)
- RaiseError("integers within curly brackets cannot exceed 255");
-
- totalSequence += (char)value;
- }
- else if (m_buffer[m_pos] == 0)
- {
- if (m_pos >= m_size)
- RaiseError("unexpected EOF after left curly bracket");
- else
- RaiseError("unexpected null character within curly brackets");
- }
- else
- {
- if (IsAsciiPrintable(m_buffer[m_pos]))
- RaiseError("unexpected character '%c' within curly brackets", m_buffer[m_pos]);
- else
- RaiseError("unexpected character '\\x%02X' within curly brackets", m_buffer[m_pos]);
- }
- }
-
- m_pos++; // Go past the right curly bracket.
-
- return totalSequence;
-}
-
// Reads a charmap string.
int AsmFile::ReadString(unsigned char* s)
{
SkipWhitespace();
- if (m_buffer[m_pos] != '"')
- RaiseError("expected UTF-8 string literal");
-
- m_pos++;
-
- int length = 0;
+ int length;
+ StringParser stringParser(m_buffer, m_size);
- while (m_buffer[m_pos] != '"')
+ try
{
- std::string sequence = (m_buffer[m_pos] == '{') ? ReadBracketedConstants() : ReadCharOrEscape();
-
- for (const char& c : sequence)
- {
- if (length == kMaxStringLength)
- RaiseError("mapped string longer than %d bytes", length);
-
- s[length++] = c;
- }
+ m_pos += stringParser.ParseString(m_pos, s, length);
+ }
+ catch (std::runtime_error e)
+ {
+ RaiseError(e.what());
}
-
- m_pos++; // Go past the right quote.
SkipWhitespace();
if (ConsumeComma())
{
SkipWhitespace();
- int padLength = ReadInteger(kMaxStringLength);
-
- if (padLength == -1)
- RaiseError("pad length greater than maximum length (%d)", kMaxStringLength);
+ int padLength = ReadPadLength();
while (length < padLength)
{
@@ -452,7 +316,7 @@ static int ConvertDigit(char c, int radix)
}
// Reads an integer. If the integer is greater than maxValue, it returns -1.
-int AsmFile::ReadInteger(int maxValue)
+int AsmFile::ReadPadLength()
{
if (!IsAsciiDigit(m_buffer[m_pos]))
RaiseError("expected integer");
@@ -472,8 +336,8 @@ int AsmFile::ReadInteger(int maxValue)
{
n = n * radix + digit;
- if (n > (unsigned)maxValue)
- return -1;
+ if (n > kMaxStringLength)
+ RaiseError("pad length greater than maximum length (%d)", kMaxStringLength);
m_pos++;
}
diff --git a/tools/preproc/asm_file.h b/tools/preproc/asm_file.h
index 398c46a36..335dbab4b 100644
--- a/tools/preproc/asm_file.h
+++ b/tools/preproc/asm_file.h
@@ -56,11 +56,9 @@ private:
std::string m_filename;
bool ConsumeComma();
- int ReadInteger(int maxValue);
+ int ReadPadLength();
void RemoveComments();
bool CheckForDirective(std::string name);
- std::string ReadCharOrEscape();
- std::string ReadBracketedConstants();
void SkipWhitespace();
void ExpectEmptyRestOfLine();
void ReportDiagnostic(const char* type, const char* format, std::va_list args);
diff --git a/tools/preproc/c_file.cpp b/tools/preproc/c_file.cpp
new file mode 100644
index 000000000..1e4dea359
--- /dev/null
+++ b/tools/preproc/c_file.cpp
@@ -0,0 +1,199 @@
+// Copyright(c) 2016 YamaArashi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include <cstdio>
+#include <cstdarg>
+#include "preproc.h"
+#include "c_file.h"
+#include "char_util.h"
+#include "utf8.h"
+#include "string_parser.h"
+
+CFile::CFile(std::string filename) : m_filename(filename)
+{
+ FILE *fp = std::fopen(filename.c_str(), "rb");
+
+ if (fp == NULL)
+ FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str());
+
+ std::fseek(fp, 0, SEEK_END);
+
+ m_size = std::ftell(fp);
+
+ if (m_size < 0)
+ FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str());
+
+ m_buffer = new char[m_size + 1];
+
+ std::rewind(fp);
+
+ if (std::fread(m_buffer, m_size, 1, fp) != 1)
+ FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str());
+
+ m_buffer[m_size] = 0;
+
+ std::fclose(fp);
+
+ m_pos = 0;
+ m_lineNum = 1;
+}
+
+CFile::CFile(CFile&& other) : m_filename(std::move(other.m_filename))
+{
+ m_buffer = other.m_buffer;
+ m_pos = other.m_pos;
+ m_size = other.m_size;
+ m_lineNum = other.m_lineNum;
+
+ other.m_buffer = nullptr;
+}
+
+CFile::~CFile()
+{
+ delete[] m_buffer;
+}
+
+void CFile::Preproc()
+{
+ bool inConcatMode = false;
+ char stringChar = 0;
+
+ while (m_pos < m_size)
+ {
+ if (stringChar)
+ {
+ if (m_buffer[m_pos] == stringChar)
+ {
+ std::putchar(stringChar);
+ m_pos++;
+ stringChar = 0;
+ }
+ else if (m_buffer[m_pos] == '\\' && m_buffer[m_pos + 1] == stringChar)
+ {
+ std::putchar('\\');
+ std::putchar(stringChar);
+ m_pos += 2;
+ }
+ else
+ {
+ std::putchar(m_buffer[m_pos]);
+ m_pos++;
+ }
+ }
+ else
+ {
+ if (inConcatMode ? m_buffer[m_pos] == '"'
+ : m_buffer[m_pos] == '_' && m_buffer[m_pos + 1] == '"')
+ {
+ if (!inConcatMode)
+ m_pos++; // skip past underscore
+
+ unsigned char s[kMaxStringLength];
+ int length;
+ StringParser stringParser(m_buffer, m_size);
+
+ try
+ {
+ m_pos += stringParser.ParseString(m_pos, s, length);
+ }
+ catch (std::runtime_error e)
+ {
+ RaiseError(e.what());
+ }
+
+ if (!inConcatMode)
+ {
+ std::printf("{ ");
+ }
+
+ inConcatMode = true;
+
+ for (int i = 0; i < length; i++)
+ printf("0x%02X, ", s[i]);
+ }
+ else
+ {
+ char c = m_buffer[m_pos++];
+
+ if (c == '\r')
+ {
+ if (m_buffer[m_pos] == '\n')
+ {
+ m_pos++;
+ }
+
+ c = '\n';
+ }
+
+ if ((c != ' ' && c != '\t' && c != '\n') && inConcatMode)
+ {
+ std::printf("0xFF }");
+ inConcatMode = false;
+ }
+
+ std::putchar(c);
+
+ if (c == '\n')
+ m_lineNum++;
+ else if (c == '"')
+ stringChar = '"';
+ else if (m_buffer[m_pos] == '\'')
+ stringChar = '\'';
+ }
+ }
+ }
+
+ if (inConcatMode)
+ {
+ printf("0xFF }");
+ RaiseWarning("string at end of file");
+ }
+}
+
+// Reports a diagnostic message.
+void CFile::ReportDiagnostic(const char* type, const char* format, std::va_list args)
+{
+ const int bufferSize = 1024;
+ char buffer[bufferSize];
+ std::vsnprintf(buffer, bufferSize, format, args);
+ std::fprintf(stderr, "%s:%ld: %s: %s\n", m_filename.c_str(), m_lineNum, type, buffer);
+}
+
+#define DO_REPORT(type) \
+do \
+{ \
+ std::va_list args; \
+ va_start(args, format); \
+ ReportDiagnostic(type, format, args); \
+ va_end(args); \
+} while (0)
+
+// Reports an error diagnostic and terminates the program.
+void CFile::RaiseError(const char* format, ...)
+{
+ DO_REPORT("error");
+ std::exit(1);
+}
+
+// Reports a warning diagnostic.
+void CFile::RaiseWarning(const char* format, ...)
+{
+ DO_REPORT("warning");
+}
diff --git a/tools/preproc/c_file.h b/tools/preproc/c_file.h
new file mode 100644
index 000000000..b6041cc00
--- /dev/null
+++ b/tools/preproc/c_file.h
@@ -0,0 +1,50 @@
+// Copyright(c) 2016 YamaArashi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef C_FILE_H
+#define C_FILE_H
+
+#include <cstdarg>
+#include <cstdint>
+#include <string>
+#include "preproc.h"
+
+class CFile
+{
+public:
+ CFile(std::string filename);
+ CFile(CFile&& other);
+ CFile(const CFile&) = delete;
+ ~CFile();
+ void Preproc();
+
+private:
+ char* m_buffer;
+ long m_pos;
+ long m_size;
+ long m_lineNum;
+ std::string m_filename;
+
+ void ReportDiagnostic(const char* type, const char* format, std::va_list args);
+ void RaiseError(const char* format, ...);
+ void RaiseWarning(const char* format, ...);
+};
+
+#endif // C_FILE_H
diff --git a/tools/preproc/charmap.cpp b/tools/preproc/charmap.cpp
index 573981694..a7bedfe26 100644
--- a/tools/preproc/charmap.cpp
+++ b/tools/preproc/charmap.cpp
@@ -76,6 +76,9 @@ CharmapReader::CharmapReader(std::string filename) : m_filename(filename)
m_size = std::ftell(fp);
+ if (m_size < 0)
+ FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str());
+
m_buffer = new char[m_size + 1];
std::rewind(fp);
diff --git a/tools/preproc/preproc.cpp b/tools/preproc/preproc.cpp
index 4f216f23a..1dd6808c3 100644
--- a/tools/preproc/preproc.cpp
+++ b/tools/preproc/preproc.cpp
@@ -18,35 +18,29 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
+#include <string>
#include <stack>
#include "preproc.h"
#include "asm_file.h"
+#include "c_file.h"
#include "charmap.h"
Charmap* g_charmap;
-int main(int argc, char **argv)
+void PreprocAsmFile(std::string filename)
{
- if (argc != 3)
- {
- fprintf(stderr, "Usage: %s ASM_FILE CHARMAP_FILE", argv[0]);
- return 1;
- }
-
- g_charmap = new Charmap(argv[2]);
-
std::stack<AsmFile> stack;
- stack.push(AsmFile(argv[1]));
+ stack.push(AsmFile(filename));
for (;;)
{
while (stack.top().IsAtEnd())
{
stack.pop();
-
+
if (stack.empty())
- return 0;
+ return;
else
stack.top().OutputLocation();
}
@@ -84,3 +78,55 @@ int main(int argc, char **argv)
}
}
}
+
+void PreprocCFile(std::string filename)
+{
+ CFile cFile(filename);
+ cFile.Preproc();
+}
+
+char* GetFileExtension(char* filename)
+{
+ char* extension = filename;
+
+ while (*extension != 0)
+ extension++;
+
+ while (extension > filename && *extension != '.')
+ extension--;
+
+ if (extension == filename)
+ return nullptr;
+
+ extension++;
+
+ if (*extension == 0)
+ return nullptr;
+
+ return extension;
+}
+
+int main(int argc, char **argv)
+{
+ if (argc != 3)
+ {
+ fprintf(stderr, "Usage: %s SRC_FILE CHARMAP_FILE", argv[0]);
+ return 1;
+ }
+
+ g_charmap = new Charmap(argv[2]);
+
+ char* extension = GetFileExtension(argv[1]);
+
+ if (!extension)
+ FATAL_ERROR("\"%s\" has no file extension.\n", argv[1]);
+
+ if ((extension[0] == 's') && extension[1] == 0)
+ PreprocAsmFile(argv[1]);
+ else if ((extension[0] == 'c' || extension[0] == 'i') && extension[1] == 0)
+ PreprocCFile(argv[1]);
+ else
+ FATAL_ERROR("\"%s\" has an unknown file extension of \"%s\".\n", argv[1], extension);
+
+ return 0;
+}
diff --git a/tools/preproc/preproc.h b/tools/preproc/preproc.h
index 926748efd..515f64e07 100644
--- a/tools/preproc/preproc.h
+++ b/tools/preproc/preproc.h
@@ -21,6 +21,8 @@
#ifndef PREPROC_H
#define PREPROC_H
+#include <cstdio>
+#include <cstdlib>
#include "charmap.h"
#ifdef _MSC_VER
@@ -44,7 +46,7 @@ do \
#endif // _MSC_VER
const int kMaxPath = 256;
-const int kMaxStringLength = 256;
+const int kMaxStringLength = 1024;
const unsigned long kMaxCharmapSequenceLength = 16;
extern Charmap* g_charmap;
diff --git a/tools/preproc/string_parser.cpp b/tools/preproc/string_parser.cpp
new file mode 100644
index 000000000..b383f2f4f
--- /dev/null
+++ b/tools/preproc/string_parser.cpp
@@ -0,0 +1,355 @@
+// Copyright(c) 2016 YamaArashi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include <cstdio>
+#include <cstdarg>
+#include <stdexcept>
+#include "preproc.h"
+#include "string_parser.h"
+#include "char_util.h"
+#include "utf8.h"
+
+// Reads a charmap char or escape sequence.
+std::string StringParser::ReadCharOrEscape()
+{
+ std::string sequence;
+
+ bool isEscape = (m_buffer[m_pos] == '\\');
+
+ if (isEscape)
+ {
+ m_pos++;
+
+ if (m_buffer[m_pos] == '"')
+ {
+ sequence = g_charmap->Char('"');
+
+ if (sequence.length() == 0)
+ RaiseError("no mapping exists for double quote");
+
+ return sequence;
+ }
+ else if (m_buffer[m_pos] == '\\')
+ {
+ sequence = g_charmap->Char('\\');
+
+ if (sequence.length() == 0)
+ RaiseError("no mapping exists for backslash");
+
+ return sequence;
+ }
+ }
+
+ unsigned char c = m_buffer[m_pos];
+
+ if (c == 0)
+ {
+ if (m_pos >= m_size)
+ RaiseError("unexpected EOF in UTF-8 string");
+ else
+ RaiseError("unexpected null character in UTF-8 string");
+ }
+
+ if (IsAscii(c) && !IsAsciiPrintable(c))
+ RaiseError("unexpected character U+%X in UTF-8 string", c);
+
+ UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]);
+ m_pos += unicodeChar.encodingLength;
+ std::int32_t code = unicodeChar.code;
+
+ if (code == -1)
+ RaiseError("invalid encoding in UTF-8 string");
+
+ if (isEscape && code >= 128)
+ RaiseError("escapes using non-ASCII characters are invalid");
+
+ sequence = isEscape ? g_charmap->Escape(code) : g_charmap->Char(code);
+
+ if (sequence.length() == 0)
+ {
+ if (isEscape)
+ RaiseError("unknown escape '\\%c'", code);
+ else
+ RaiseError("unknown character U+%X", code);
+ }
+
+ return sequence;
+}
+
+// Reads a charmap constant, i.e. "{FOO}".
+std::string StringParser::ReadBracketedConstants()
+{
+ std::string totalSequence;
+
+ m_pos++; // Assume we're on the left curly bracket.
+
+ while (m_buffer[m_pos] != '}')
+ {
+ SkipWhitespace();
+
+ if (IsIdentifierStartingChar(m_buffer[m_pos]))
+ {
+ long startPos = m_pos;
+
+ m_pos++;
+
+ while (IsIdentifierChar(m_buffer[m_pos]))
+ m_pos++;
+
+ std::string sequence = g_charmap->Constant(std::string(&m_buffer[startPos], m_pos - startPos));
+
+ if (sequence.length() == 0)
+ {
+ m_buffer[m_pos] = 0;
+ RaiseError("unknown constant '%s'", &m_buffer[startPos]);
+ }
+
+ totalSequence += sequence;
+ }
+ else if (IsAsciiDigit(m_buffer[m_pos]))
+ {
+ Integer integer = ReadInteger();
+
+ switch (integer.size)
+ {
+ case 1:
+ totalSequence += (unsigned char)integer.value;
+ break;
+ case 2:
+ totalSequence += (unsigned char)integer.value;
+ totalSequence += (unsigned char)(integer.value >> 8);
+ break;
+ case 4:
+ totalSequence += (unsigned char)integer.value;
+ totalSequence += (unsigned char)(integer.value >> 8);
+ totalSequence += (unsigned char)(integer.value >> 16);
+ totalSequence += (unsigned char)(integer.value >> 24);
+ break;
+ }
+ }
+ else if (m_buffer[m_pos] == 0)
+ {
+ if (m_pos >= m_size)
+ RaiseError("unexpected EOF after left curly bracket");
+ else
+ RaiseError("unexpected null character within curly brackets");
+ }
+ else
+ {
+ if (IsAsciiPrintable(m_buffer[m_pos]))
+ RaiseError("unexpected character '%c' within curly brackets", m_buffer[m_pos]);
+ else
+ RaiseError("unexpected character '\\x%02X' within curly brackets", m_buffer[m_pos]);
+ }
+ }
+
+ m_pos++; // Go past the right curly bracket.
+
+ return totalSequence;
+}
+
+// Reads a charmap string.
+int StringParser::ParseString(long srcPos, unsigned char* dest, int& destLength)
+{
+ m_pos = srcPos;
+
+ if (m_buffer[m_pos] != '"')
+ RaiseError("expected UTF-8 string literal");
+
+ long start = m_pos;
+
+ m_pos++;
+
+ destLength = 0;
+
+ while (m_buffer[m_pos] != '"')
+ {
+ std::string sequence = (m_buffer[m_pos] == '{') ? ReadBracketedConstants() : ReadCharOrEscape();
+
+ for (const char& c : sequence)
+ {
+ if (destLength == kMaxStringLength)
+ RaiseError("mapped string longer than %d bytes", destLength);
+
+ dest[destLength++] = c;
+ }
+ }
+
+ m_pos++; // Go past the right quote.
+
+ return m_pos - start;
+}
+
+void StringParser::RaiseError(const char* format, ...)
+{
+ const int bufferSize = 1024;
+ char buffer[bufferSize];
+
+ std::va_list args;
+ va_start(args, format);
+ std::vsnprintf(buffer, bufferSize, format, args);
+ va_end(args);
+
+ throw std::runtime_error(buffer);
+}
+
+// Converts digit character to numerical value.
+static int ConvertDigit(char c, int radix)
+{
+ int digit;
+
+ if (c >= '0' && c <= '9')
+ digit = c - '0';
+ else if (c >= 'A' && c <= 'F')
+ digit = 10 + c - 'A';
+ else if (c >= 'a' && c <= 'f')
+ digit = 10 + c - 'a';
+ else
+ return -1;
+
+ return (digit < radix) ? digit : -1;
+}
+
+void StringParser::SkipRestOfInteger(int radix)
+{
+ while (ConvertDigit(m_buffer[m_pos], radix) != -1)
+ m_pos++;
+}
+
+StringParser::Integer StringParser::ReadDecimal()
+{
+ const int radix = 10;
+ std::uint64_t n = 0;
+ int digit;
+ std::uint64_t max = UINT32_MAX;
+ long startPos = m_pos;
+
+ while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1)
+ {
+ n = n * radix + digit;
+
+ if (n >= max)
+ {
+ SkipRestOfInteger(radix);
+
+ std::string intLiteral(m_buffer + startPos, m_pos - startPos);
+ RaiseError("integer literal \"%s\" is too large", intLiteral.c_str());
+ }
+
+ m_pos++;
+ }
+
+ int size;
+
+ if (m_buffer[m_pos] == 'H')
+ {
+ if (n >= 0x10000)
+ {
+ RaiseError("%lu is too large to be a halfword", (unsigned long)n);
+ }
+
+ size = 2;
+ m_pos++;
+ }
+ else if (m_buffer[m_pos] == 'W')
+ {
+ size = 4;
+ m_pos++;
+ }
+ else
+ {
+ if (n >= 0x10000)
+ size = 4;
+ else if (n >= 0x100)
+ size = 2;
+ else
+ size = 1;
+ }
+
+ return{ static_cast<std::uint32_t>(n), size };
+}
+
+StringParser::Integer StringParser::ReadHex()
+{
+ const int radix = 16;
+ std::uint64_t n = 0;
+ int digit;
+ std::uint64_t max = UINT32_MAX;
+ long startPos = m_pos;
+
+ while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1)
+ {
+ n = n * radix + digit;
+
+ if (n >= max)
+ {
+ SkipRestOfInteger(radix);
+
+ std::string intLiteral(m_buffer + startPos, m_pos - startPos);
+ RaiseError("integer literal \"%s\" is too large", intLiteral.c_str());
+ }
+
+ m_pos++;
+ }
+
+ int length = m_pos - startPos;
+ int size = 0;
+
+ switch (length)
+ {
+ case 2:
+ size = 1;
+ break;
+ case 4:
+ size = 2;
+ break;
+ case 8:
+ size = 4;
+ break;
+ default:
+ {
+ std::string intLiteral(m_buffer + startPos, m_pos - startPos);
+ RaiseError("hex integer literal \"0x%s\" doesn't have length of 2, 4, or 8 digits", intLiteral.c_str());
+ }
+ }
+
+ return{ static_cast<std::uint32_t>(n), size };
+}
+
+StringParser::Integer StringParser::ReadInteger()
+{
+ if (!IsAsciiDigit(m_buffer[m_pos]))
+ RaiseError("expected integer");
+
+ if (m_buffer[m_pos] == '0' && m_buffer[m_pos + 1] == 'x')
+ {
+ m_pos += 2;
+ return ReadHex();
+ }
+
+ return ReadDecimal();
+}
+
+// Skips tabs and spaces.
+void StringParser::SkipWhitespace()
+{
+ while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ')
+ m_pos++;
+}
diff --git a/tools/preproc/string_parser.h b/tools/preproc/string_parser.h
new file mode 100644
index 000000000..abd2bfe9a
--- /dev/null
+++ b/tools/preproc/string_parser.h
@@ -0,0 +1,55 @@
+// Copyright(c) 2016 YamaArashi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef STRING_PARSER_H
+#define STRING_PARSER_H
+
+#include <cstdint>
+#include <string>
+#include "preproc.h"
+
+class StringParser
+{
+public:
+ StringParser(char* buffer, long size) : m_buffer(buffer), m_size(size), m_pos(0) {}
+ int ParseString(long srcPos, unsigned char* dest, int &destLength);
+
+private:
+ struct Integer
+ {
+ std::uint32_t value;
+ int size;
+ };
+
+ char* m_buffer;
+ long m_size;
+ long m_pos;
+
+ Integer ReadInteger();
+ Integer ReadDecimal();
+ Integer ReadHex();
+ std::string ReadCharOrEscape();
+ std::string ReadBracketedConstants();
+ void SkipWhitespace();
+ void SkipRestOfInteger(int radix);
+ void RaiseError(const char* format, ...);
+};
+
+#endif // STRING_PARSER_H