diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/preproc/Makefile | 6 | ||||
-rw-r--r-- | tools/preproc/asm_file.cpp | 168 | ||||
-rw-r--r-- | tools/preproc/asm_file.h | 4 | ||||
-rw-r--r-- | tools/preproc/c_file.cpp | 199 | ||||
-rw-r--r-- | tools/preproc/c_file.h | 50 | ||||
-rw-r--r-- | tools/preproc/charmap.cpp | 3 | ||||
-rw-r--r-- | tools/preproc/preproc.cpp | 70 | ||||
-rw-r--r-- | tools/preproc/preproc.h | 4 | ||||
-rw-r--r-- | tools/preproc/string_parser.cpp | 355 | ||||
-rw-r--r-- | tools/preproc/string_parser.h | 55 |
10 files changed, 744 insertions, 170 deletions
diff --git a/tools/preproc/Makefile b/tools/preproc/Makefile index f504e45bd..24f60e3f6 100644 --- a/tools/preproc/Makefile +++ b/tools/preproc/Makefile @@ -2,9 +2,11 @@ CXX := g++ CXXFLAGS := -std=c++14 -O2 -Wall -Wno-switch -SRCS := asm_file.cpp charmap.cpp preproc.cpp utf8.cpp +SRCS := asm_file.cpp c_file.cpp charmap.cpp preproc.cpp string_parser.cpp \ + utf8.cpp -HEADERS := asm_file.h char_util.h charmap.h preproc.h utf8.h +HEADERS := asm_file.h c_file.h char_util.h charmap.h preproc.h string_parser.h \ + utf8.h .PHONY: clean diff --git a/tools/preproc/asm_file.cpp b/tools/preproc/asm_file.cpp index b05ebab3f..b843d640b 100644 --- a/tools/preproc/asm_file.cpp +++ b/tools/preproc/asm_file.cpp @@ -24,6 +24,7 @@ #include "asm_file.h" #include "char_util.h" #include "utf8.h" +#include "string_parser.h" AsmFile::AsmFile(std::string filename) : m_filename(filename) { @@ -36,6 +37,9 @@ AsmFile::AsmFile(std::string filename) : m_filename(filename) m_size = std::ftell(fp); + if (m_size < 0) + FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str()); + m_buffer = new char[m_size + 1]; std::rewind(fp); @@ -246,169 +250,29 @@ std::string AsmFile::ReadPath() return std::string(&m_buffer[startPos], length); } -// Reads a charmap char or escape sequence. -std::string AsmFile::ReadCharOrEscape() -{ - std::string sequence; - - bool isEscape = (m_buffer[m_pos] == '\\'); - - if (isEscape) - { - m_pos++; - - if (m_buffer[m_pos] == '"') - { - sequence = g_charmap->Char('"'); - - if (sequence.length() == 0) - RaiseError("no mapping exists for double quote"); - - return sequence; - } - else if (m_buffer[m_pos] == '\\') - { - sequence = g_charmap->Char('\\'); - - if (sequence.length() == 0) - RaiseError("no mapping exists for backslash"); - - return sequence; - } - } - - unsigned char c = m_buffer[m_pos]; - - if (c == 0) - { - if (m_pos >= m_size) - RaiseError("unexpected EOF in UTF-8 string"); - else - RaiseError("unexpected null character in UTF-8 string"); - } - - if (IsAscii(c) && !IsAsciiPrintable(c)) - RaiseError("unexpected character U+%X in UTF-8 string", c); - - UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]); - m_pos += unicodeChar.encodingLength; - std::int32_t code = unicodeChar.code; - - if (code == -1) - RaiseError("invalid encoding in UTF-8 string"); - - if (isEscape && code >= 128) - RaiseError("escapes using non-ASCII characters are invalid"); - - sequence = isEscape ? g_charmap->Escape(code) : g_charmap->Char(code); - - if (sequence.length() == 0) - { - if (isEscape) - RaiseError("unknown escape '\\%c'", code); - else - RaiseError("unknown character U+%X", code); - } - - return sequence; -} - -// Reads a charmap constant, i.e. "{FOO}". -std::string AsmFile::ReadBracketedConstants() -{ - std::string totalSequence; - - m_pos++; // Assume we're on the left curly bracket. - - while (m_buffer[m_pos] != '}') - { - SkipWhitespace(); - - if (IsIdentifierStartingChar(m_buffer[m_pos])) - { - long startPos = m_pos; - - m_pos++; - - while (IsIdentifierChar(m_buffer[m_pos])) - m_pos++; - - std::string sequence = g_charmap->Constant(std::string(&m_buffer[startPos], m_pos - startPos)); - - if (sequence.length() == 0) - { - m_buffer[m_pos] = 0; - RaiseError("unknown constant '%s'", &m_buffer[startPos]); - } - - totalSequence += sequence; - } - else if (IsAsciiDigit(m_buffer[m_pos])) - { - int value = ReadInteger(255); - - if (value == -1) - RaiseError("integers within curly brackets cannot exceed 255"); - - totalSequence += (char)value; - } - else if (m_buffer[m_pos] == 0) - { - if (m_pos >= m_size) - RaiseError("unexpected EOF after left curly bracket"); - else - RaiseError("unexpected null character within curly brackets"); - } - else - { - if (IsAsciiPrintable(m_buffer[m_pos])) - RaiseError("unexpected character '%c' within curly brackets", m_buffer[m_pos]); - else - RaiseError("unexpected character '\\x%02X' within curly brackets", m_buffer[m_pos]); - } - } - - m_pos++; // Go past the right curly bracket. - - return totalSequence; -} - // Reads a charmap string. int AsmFile::ReadString(unsigned char* s) { SkipWhitespace(); - if (m_buffer[m_pos] != '"') - RaiseError("expected UTF-8 string literal"); - - m_pos++; - - int length = 0; + int length; + StringParser stringParser(m_buffer, m_size); - while (m_buffer[m_pos] != '"') + try { - std::string sequence = (m_buffer[m_pos] == '{') ? ReadBracketedConstants() : ReadCharOrEscape(); - - for (const char& c : sequence) - { - if (length == kMaxStringLength) - RaiseError("mapped string longer than %d bytes", length); - - s[length++] = c; - } + m_pos += stringParser.ParseString(m_pos, s, length); + } + catch (std::runtime_error e) + { + RaiseError(e.what()); } - - m_pos++; // Go past the right quote. SkipWhitespace(); if (ConsumeComma()) { SkipWhitespace(); - int padLength = ReadInteger(kMaxStringLength); - - if (padLength == -1) - RaiseError("pad length greater than maximum length (%d)", kMaxStringLength); + int padLength = ReadPadLength(); while (length < padLength) { @@ -452,7 +316,7 @@ static int ConvertDigit(char c, int radix) } // Reads an integer. If the integer is greater than maxValue, it returns -1. -int AsmFile::ReadInteger(int maxValue) +int AsmFile::ReadPadLength() { if (!IsAsciiDigit(m_buffer[m_pos])) RaiseError("expected integer"); @@ -472,8 +336,8 @@ int AsmFile::ReadInteger(int maxValue) { n = n * radix + digit; - if (n > (unsigned)maxValue) - return -1; + if (n > kMaxStringLength) + RaiseError("pad length greater than maximum length (%d)", kMaxStringLength); m_pos++; } diff --git a/tools/preproc/asm_file.h b/tools/preproc/asm_file.h index 398c46a36..335dbab4b 100644 --- a/tools/preproc/asm_file.h +++ b/tools/preproc/asm_file.h @@ -56,11 +56,9 @@ private: std::string m_filename; bool ConsumeComma(); - int ReadInteger(int maxValue); + int ReadPadLength(); void RemoveComments(); bool CheckForDirective(std::string name); - std::string ReadCharOrEscape(); - std::string ReadBracketedConstants(); void SkipWhitespace(); void ExpectEmptyRestOfLine(); void ReportDiagnostic(const char* type, const char* format, std::va_list args); diff --git a/tools/preproc/c_file.cpp b/tools/preproc/c_file.cpp new file mode 100644 index 000000000..1e4dea359 --- /dev/null +++ b/tools/preproc/c_file.cpp @@ -0,0 +1,199 @@ +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include <cstdio> +#include <cstdarg> +#include "preproc.h" +#include "c_file.h" +#include "char_util.h" +#include "utf8.h" +#include "string_parser.h" + +CFile::CFile(std::string filename) : m_filename(filename) +{ + FILE *fp = std::fopen(filename.c_str(), "rb"); + + if (fp == NULL) + FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str()); + + std::fseek(fp, 0, SEEK_END); + + m_size = std::ftell(fp); + + if (m_size < 0) + FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str()); + + m_buffer = new char[m_size + 1]; + + std::rewind(fp); + + if (std::fread(m_buffer, m_size, 1, fp) != 1) + FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str()); + + m_buffer[m_size] = 0; + + std::fclose(fp); + + m_pos = 0; + m_lineNum = 1; +} + +CFile::CFile(CFile&& other) : m_filename(std::move(other.m_filename)) +{ + m_buffer = other.m_buffer; + m_pos = other.m_pos; + m_size = other.m_size; + m_lineNum = other.m_lineNum; + + other.m_buffer = nullptr; +} + +CFile::~CFile() +{ + delete[] m_buffer; +} + +void CFile::Preproc() +{ + bool inConcatMode = false; + char stringChar = 0; + + while (m_pos < m_size) + { + if (stringChar) + { + if (m_buffer[m_pos] == stringChar) + { + std::putchar(stringChar); + m_pos++; + stringChar = 0; + } + else if (m_buffer[m_pos] == '\\' && m_buffer[m_pos + 1] == stringChar) + { + std::putchar('\\'); + std::putchar(stringChar); + m_pos += 2; + } + else + { + std::putchar(m_buffer[m_pos]); + m_pos++; + } + } + else + { + if (inConcatMode ? m_buffer[m_pos] == '"' + : m_buffer[m_pos] == '_' && m_buffer[m_pos + 1] == '"') + { + if (!inConcatMode) + m_pos++; // skip past underscore + + unsigned char s[kMaxStringLength]; + int length; + StringParser stringParser(m_buffer, m_size); + + try + { + m_pos += stringParser.ParseString(m_pos, s, length); + } + catch (std::runtime_error e) + { + RaiseError(e.what()); + } + + if (!inConcatMode) + { + std::printf("{ "); + } + + inConcatMode = true; + + for (int i = 0; i < length; i++) + printf("0x%02X, ", s[i]); + } + else + { + char c = m_buffer[m_pos++]; + + if (c == '\r') + { + if (m_buffer[m_pos] == '\n') + { + m_pos++; + } + + c = '\n'; + } + + if ((c != ' ' && c != '\t' && c != '\n') && inConcatMode) + { + std::printf("0xFF }"); + inConcatMode = false; + } + + std::putchar(c); + + if (c == '\n') + m_lineNum++; + else if (c == '"') + stringChar = '"'; + else if (m_buffer[m_pos] == '\'') + stringChar = '\''; + } + } + } + + if (inConcatMode) + { + printf("0xFF }"); + RaiseWarning("string at end of file"); + } +} + +// Reports a diagnostic message. +void CFile::ReportDiagnostic(const char* type, const char* format, std::va_list args) +{ + const int bufferSize = 1024; + char buffer[bufferSize]; + std::vsnprintf(buffer, bufferSize, format, args); + std::fprintf(stderr, "%s:%ld: %s: %s\n", m_filename.c_str(), m_lineNum, type, buffer); +} + +#define DO_REPORT(type) \ +do \ +{ \ + std::va_list args; \ + va_start(args, format); \ + ReportDiagnostic(type, format, args); \ + va_end(args); \ +} while (0) + +// Reports an error diagnostic and terminates the program. +void CFile::RaiseError(const char* format, ...) +{ + DO_REPORT("error"); + std::exit(1); +} + +// Reports a warning diagnostic. +void CFile::RaiseWarning(const char* format, ...) +{ + DO_REPORT("warning"); +} diff --git a/tools/preproc/c_file.h b/tools/preproc/c_file.h new file mode 100644 index 000000000..b6041cc00 --- /dev/null +++ b/tools/preproc/c_file.h @@ -0,0 +1,50 @@ +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef C_FILE_H +#define C_FILE_H + +#include <cstdarg> +#include <cstdint> +#include <string> +#include "preproc.h" + +class CFile +{ +public: + CFile(std::string filename); + CFile(CFile&& other); + CFile(const CFile&) = delete; + ~CFile(); + void Preproc(); + +private: + char* m_buffer; + long m_pos; + long m_size; + long m_lineNum; + std::string m_filename; + + void ReportDiagnostic(const char* type, const char* format, std::va_list args); + void RaiseError(const char* format, ...); + void RaiseWarning(const char* format, ...); +}; + +#endif // C_FILE_H diff --git a/tools/preproc/charmap.cpp b/tools/preproc/charmap.cpp index 573981694..a7bedfe26 100644 --- a/tools/preproc/charmap.cpp +++ b/tools/preproc/charmap.cpp @@ -76,6 +76,9 @@ CharmapReader::CharmapReader(std::string filename) : m_filename(filename) m_size = std::ftell(fp); + if (m_size < 0) + FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str()); + m_buffer = new char[m_size + 1]; std::rewind(fp); diff --git a/tools/preproc/preproc.cpp b/tools/preproc/preproc.cpp index 4f216f23a..1dd6808c3 100644 --- a/tools/preproc/preproc.cpp +++ b/tools/preproc/preproc.cpp @@ -18,35 +18,29 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. +#include <string> #include <stack> #include "preproc.h" #include "asm_file.h" +#include "c_file.h" #include "charmap.h" Charmap* g_charmap; -int main(int argc, char **argv) +void PreprocAsmFile(std::string filename) { - if (argc != 3) - { - fprintf(stderr, "Usage: %s ASM_FILE CHARMAP_FILE", argv[0]); - return 1; - } - - g_charmap = new Charmap(argv[2]); - std::stack<AsmFile> stack; - stack.push(AsmFile(argv[1])); + stack.push(AsmFile(filename)); for (;;) { while (stack.top().IsAtEnd()) { stack.pop(); - + if (stack.empty()) - return 0; + return; else stack.top().OutputLocation(); } @@ -84,3 +78,55 @@ int main(int argc, char **argv) } } } + +void PreprocCFile(std::string filename) +{ + CFile cFile(filename); + cFile.Preproc(); +} + +char* GetFileExtension(char* filename) +{ + char* extension = filename; + + while (*extension != 0) + extension++; + + while (extension > filename && *extension != '.') + extension--; + + if (extension == filename) + return nullptr; + + extension++; + + if (*extension == 0) + return nullptr; + + return extension; +} + +int main(int argc, char **argv) +{ + if (argc != 3) + { + fprintf(stderr, "Usage: %s SRC_FILE CHARMAP_FILE", argv[0]); + return 1; + } + + g_charmap = new Charmap(argv[2]); + + char* extension = GetFileExtension(argv[1]); + + if (!extension) + FATAL_ERROR("\"%s\" has no file extension.\n", argv[1]); + + if ((extension[0] == 's') && extension[1] == 0) + PreprocAsmFile(argv[1]); + else if ((extension[0] == 'c' || extension[0] == 'i') && extension[1] == 0) + PreprocCFile(argv[1]); + else + FATAL_ERROR("\"%s\" has an unknown file extension of \"%s\".\n", argv[1], extension); + + return 0; +} diff --git a/tools/preproc/preproc.h b/tools/preproc/preproc.h index 926748efd..515f64e07 100644 --- a/tools/preproc/preproc.h +++ b/tools/preproc/preproc.h @@ -21,6 +21,8 @@ #ifndef PREPROC_H #define PREPROC_H +#include <cstdio> +#include <cstdlib> #include "charmap.h" #ifdef _MSC_VER @@ -44,7 +46,7 @@ do \ #endif // _MSC_VER const int kMaxPath = 256; -const int kMaxStringLength = 256; +const int kMaxStringLength = 1024; const unsigned long kMaxCharmapSequenceLength = 16; extern Charmap* g_charmap; diff --git a/tools/preproc/string_parser.cpp b/tools/preproc/string_parser.cpp new file mode 100644 index 000000000..b383f2f4f --- /dev/null +++ b/tools/preproc/string_parser.cpp @@ -0,0 +1,355 @@ +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include <cstdio> +#include <cstdarg> +#include <stdexcept> +#include "preproc.h" +#include "string_parser.h" +#include "char_util.h" +#include "utf8.h" + +// Reads a charmap char or escape sequence. +std::string StringParser::ReadCharOrEscape() +{ + std::string sequence; + + bool isEscape = (m_buffer[m_pos] == '\\'); + + if (isEscape) + { + m_pos++; + + if (m_buffer[m_pos] == '"') + { + sequence = g_charmap->Char('"'); + + if (sequence.length() == 0) + RaiseError("no mapping exists for double quote"); + + return sequence; + } + else if (m_buffer[m_pos] == '\\') + { + sequence = g_charmap->Char('\\'); + + if (sequence.length() == 0) + RaiseError("no mapping exists for backslash"); + + return sequence; + } + } + + unsigned char c = m_buffer[m_pos]; + + if (c == 0) + { + if (m_pos >= m_size) + RaiseError("unexpected EOF in UTF-8 string"); + else + RaiseError("unexpected null character in UTF-8 string"); + } + + if (IsAscii(c) && !IsAsciiPrintable(c)) + RaiseError("unexpected character U+%X in UTF-8 string", c); + + UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]); + m_pos += unicodeChar.encodingLength; + std::int32_t code = unicodeChar.code; + + if (code == -1) + RaiseError("invalid encoding in UTF-8 string"); + + if (isEscape && code >= 128) + RaiseError("escapes using non-ASCII characters are invalid"); + + sequence = isEscape ? g_charmap->Escape(code) : g_charmap->Char(code); + + if (sequence.length() == 0) + { + if (isEscape) + RaiseError("unknown escape '\\%c'", code); + else + RaiseError("unknown character U+%X", code); + } + + return sequence; +} + +// Reads a charmap constant, i.e. "{FOO}". +std::string StringParser::ReadBracketedConstants() +{ + std::string totalSequence; + + m_pos++; // Assume we're on the left curly bracket. + + while (m_buffer[m_pos] != '}') + { + SkipWhitespace(); + + if (IsIdentifierStartingChar(m_buffer[m_pos])) + { + long startPos = m_pos; + + m_pos++; + + while (IsIdentifierChar(m_buffer[m_pos])) + m_pos++; + + std::string sequence = g_charmap->Constant(std::string(&m_buffer[startPos], m_pos - startPos)); + + if (sequence.length() == 0) + { + m_buffer[m_pos] = 0; + RaiseError("unknown constant '%s'", &m_buffer[startPos]); + } + + totalSequence += sequence; + } + else if (IsAsciiDigit(m_buffer[m_pos])) + { + Integer integer = ReadInteger(); + + switch (integer.size) + { + case 1: + totalSequence += (unsigned char)integer.value; + break; + case 2: + totalSequence += (unsigned char)integer.value; + totalSequence += (unsigned char)(integer.value >> 8); + break; + case 4: + totalSequence += (unsigned char)integer.value; + totalSequence += (unsigned char)(integer.value >> 8); + totalSequence += (unsigned char)(integer.value >> 16); + totalSequence += (unsigned char)(integer.value >> 24); + break; + } + } + else if (m_buffer[m_pos] == 0) + { + if (m_pos >= m_size) + RaiseError("unexpected EOF after left curly bracket"); + else + RaiseError("unexpected null character within curly brackets"); + } + else + { + if (IsAsciiPrintable(m_buffer[m_pos])) + RaiseError("unexpected character '%c' within curly brackets", m_buffer[m_pos]); + else + RaiseError("unexpected character '\\x%02X' within curly brackets", m_buffer[m_pos]); + } + } + + m_pos++; // Go past the right curly bracket. + + return totalSequence; +} + +// Reads a charmap string. +int StringParser::ParseString(long srcPos, unsigned char* dest, int& destLength) +{ + m_pos = srcPos; + + if (m_buffer[m_pos] != '"') + RaiseError("expected UTF-8 string literal"); + + long start = m_pos; + + m_pos++; + + destLength = 0; + + while (m_buffer[m_pos] != '"') + { + std::string sequence = (m_buffer[m_pos] == '{') ? ReadBracketedConstants() : ReadCharOrEscape(); + + for (const char& c : sequence) + { + if (destLength == kMaxStringLength) + RaiseError("mapped string longer than %d bytes", destLength); + + dest[destLength++] = c; + } + } + + m_pos++; // Go past the right quote. + + return m_pos - start; +} + +void StringParser::RaiseError(const char* format, ...) +{ + const int bufferSize = 1024; + char buffer[bufferSize]; + + std::va_list args; + va_start(args, format); + std::vsnprintf(buffer, bufferSize, format, args); + va_end(args); + + throw std::runtime_error(buffer); +} + +// Converts digit character to numerical value. +static int ConvertDigit(char c, int radix) +{ + int digit; + + if (c >= '0' && c <= '9') + digit = c - '0'; + else if (c >= 'A' && c <= 'F') + digit = 10 + c - 'A'; + else if (c >= 'a' && c <= 'f') + digit = 10 + c - 'a'; + else + return -1; + + return (digit < radix) ? digit : -1; +} + +void StringParser::SkipRestOfInteger(int radix) +{ + while (ConvertDigit(m_buffer[m_pos], radix) != -1) + m_pos++; +} + +StringParser::Integer StringParser::ReadDecimal() +{ + const int radix = 10; + std::uint64_t n = 0; + int digit; + std::uint64_t max = UINT32_MAX; + long startPos = m_pos; + + while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1) + { + n = n * radix + digit; + + if (n >= max) + { + SkipRestOfInteger(radix); + + std::string intLiteral(m_buffer + startPos, m_pos - startPos); + RaiseError("integer literal \"%s\" is too large", intLiteral.c_str()); + } + + m_pos++; + } + + int size; + + if (m_buffer[m_pos] == 'H') + { + if (n >= 0x10000) + { + RaiseError("%lu is too large to be a halfword", (unsigned long)n); + } + + size = 2; + m_pos++; + } + else if (m_buffer[m_pos] == 'W') + { + size = 4; + m_pos++; + } + else + { + if (n >= 0x10000) + size = 4; + else if (n >= 0x100) + size = 2; + else + size = 1; + } + + return{ static_cast<std::uint32_t>(n), size }; +} + +StringParser::Integer StringParser::ReadHex() +{ + const int radix = 16; + std::uint64_t n = 0; + int digit; + std::uint64_t max = UINT32_MAX; + long startPos = m_pos; + + while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1) + { + n = n * radix + digit; + + if (n >= max) + { + SkipRestOfInteger(radix); + + std::string intLiteral(m_buffer + startPos, m_pos - startPos); + RaiseError("integer literal \"%s\" is too large", intLiteral.c_str()); + } + + m_pos++; + } + + int length = m_pos - startPos; + int size = 0; + + switch (length) + { + case 2: + size = 1; + break; + case 4: + size = 2; + break; + case 8: + size = 4; + break; + default: + { + std::string intLiteral(m_buffer + startPos, m_pos - startPos); + RaiseError("hex integer literal \"0x%s\" doesn't have length of 2, 4, or 8 digits", intLiteral.c_str()); + } + } + + return{ static_cast<std::uint32_t>(n), size }; +} + +StringParser::Integer StringParser::ReadInteger() +{ + if (!IsAsciiDigit(m_buffer[m_pos])) + RaiseError("expected integer"); + + if (m_buffer[m_pos] == '0' && m_buffer[m_pos + 1] == 'x') + { + m_pos += 2; + return ReadHex(); + } + + return ReadDecimal(); +} + +// Skips tabs and spaces. +void StringParser::SkipWhitespace() +{ + while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ') + m_pos++; +} diff --git a/tools/preproc/string_parser.h b/tools/preproc/string_parser.h new file mode 100644 index 000000000..abd2bfe9a --- /dev/null +++ b/tools/preproc/string_parser.h @@ -0,0 +1,55 @@ +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef STRING_PARSER_H +#define STRING_PARSER_H + +#include <cstdint> +#include <string> +#include "preproc.h" + +class StringParser +{ +public: + StringParser(char* buffer, long size) : m_buffer(buffer), m_size(size), m_pos(0) {} + int ParseString(long srcPos, unsigned char* dest, int &destLength); + +private: + struct Integer + { + std::uint32_t value; + int size; + }; + + char* m_buffer; + long m_size; + long m_pos; + + Integer ReadInteger(); + Integer ReadDecimal(); + Integer ReadHex(); + std::string ReadCharOrEscape(); + std::string ReadBracketedConstants(); + void SkipWhitespace(); + void SkipRestOfInteger(int radix); + void RaiseError(const char* format, ...); +}; + +#endif // STRING_PARSER_H |