From 8237e29a164211eb2ec4cd161eb4183cc1947fee Mon Sep 17 00:00:00 2001 From: Cheng Hann Gan Date: Thu, 9 Sep 2021 19:22:48 -0400 Subject: Defined more in-dungeon structs and enums (#53) * Defined DungeonEntity * Rename EntityType enums * Revert EntityType rename * Defined more in-dungeon structs and enums * Added more dungeon global structs/enums * Prefixed dungeonGlobalData with g * Fixed compile errors * Removed some CRLFs * Fixed compile after merge * Revert Makefile * Rename DungeonEntityData.entityType Co-authored-by: Seth Barberee * Renamed symbols per PR comments Co-authored-by: Cheng Hann Gan Co-authored-by: Seth Barberee --- tools/preproc/string_parser.cpp | 710 ++++++++++++++++++++-------------------- 1 file changed, 355 insertions(+), 355 deletions(-) (limited to 'tools/preproc/string_parser.cpp') diff --git a/tools/preproc/string_parser.cpp b/tools/preproc/string_parser.cpp index b0a4875..dd5196a 100644 --- a/tools/preproc/string_parser.cpp +++ b/tools/preproc/string_parser.cpp @@ -1,355 +1,355 @@ -// Copyright(c) 2016 YamaArashi -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include -#include -#include -#include "preproc.h" -#include "string_parser.h" -#include "char_util.h" -#include "utf8.h" - -// Reads a charmap char or escape sequence. -std::string StringParser::ReadCharOrEscape() -{ - std::string sequence; - - bool isEscape = (m_buffer[m_pos] == '\\'); - - if (isEscape) - { - m_pos++; - - if (m_buffer[m_pos] == '"') - { - sequence = g_charmap->Char('"'); - - if (sequence.length() == 0) - RaiseError("no mapping exists for double quote"); - - return sequence; - } - else if (m_buffer[m_pos] == '\\') - { - sequence = g_charmap->Char('\\'); - - if (sequence.length() == 0) - RaiseError("no mapping exists for backslash"); - - return sequence; - } - } - - unsigned char c = m_buffer[m_pos]; - - if (c == 0) - { - if (m_pos >= m_size) - RaiseError("unexpected EOF in UTF-8 string"); - else - RaiseError("unexpected null character in UTF-8 string"); - } - - if (IsAscii(c) && !IsAsciiPrintable(c)) - RaiseError("unexpected character U+%X in UTF-8 string", c); - - UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]); - m_pos += unicodeChar.encodingLength; - std::int32_t code = unicodeChar.code; - - if (code == -1) - RaiseError("invalid encoding in UTF-8 string"); - - if (isEscape && code >= 128) - RaiseError("escapes using non-ASCII characters are invalid"); - - sequence = isEscape ? g_charmap->Escape(code) : g_charmap->Char(code); - - if (sequence.length() == 0) - { - if (isEscape) - RaiseError("unknown escape '\\%c'", code); - else - RaiseError("unknown character U+%X", code); - } - - return sequence; -} - -// Reads a charmap constant, i.e. "{FOO}". -std::string StringParser::ReadBracketedConstants() -{ - std::string totalSequence; - - m_pos++; // Assume we're on the left curly bracket. - - while (m_buffer[m_pos] != '}') - { - SkipWhitespace(); - - if (IsIdentifierStartingChar(m_buffer[m_pos])) - { - long startPos = m_pos; - - m_pos++; - - while (IsIdentifierChar(m_buffer[m_pos])) - m_pos++; - - std::string sequence = g_charmap->Constant(std::string(&m_buffer[startPos], m_pos - startPos)); - - if (sequence.length() == 0) - { - m_buffer[m_pos] = 0; - RaiseError("unknown constant '%s'", &m_buffer[startPos]); - } - - totalSequence += sequence; - } - else if (IsAsciiDigit(m_buffer[m_pos])) - { - Integer integer = ReadInteger(); - - switch (integer.size) - { - case 1: - totalSequence += (unsigned char)integer.value; - break; - case 2: - totalSequence += (unsigned char)integer.value; - totalSequence += (unsigned char)(integer.value >> 8); - break; - case 4: - totalSequence += (unsigned char)integer.value; - totalSequence += (unsigned char)(integer.value >> 8); - totalSequence += (unsigned char)(integer.value >> 16); - totalSequence += (unsigned char)(integer.value >> 24); - break; - } - } - else if (m_buffer[m_pos] == 0) - { - if (m_pos >= m_size) - RaiseError("unexpected EOF after left curly bracket"); - else - RaiseError("unexpected null character within curly brackets"); - } - else - { - if (IsAsciiPrintable(m_buffer[m_pos])) - RaiseError("unexpected character '%c' within curly brackets", m_buffer[m_pos]); - else - RaiseError("unexpected character '\\x%02X' within curly brackets", m_buffer[m_pos]); - } - } - - m_pos++; // Go past the right curly bracket. - - return totalSequence; -} - -// Reads a charmap string. -int StringParser::ParseString(long srcPos, unsigned char* dest, int& destLength) -{ - m_pos = srcPos; - - if (m_buffer[m_pos] != '"') - RaiseError("expected UTF-8 string literal"); - - long start = m_pos; - - m_pos++; - - destLength = 0; - - while (m_buffer[m_pos] != '"') - { - std::string sequence = (m_buffer[m_pos] == '{') ? ReadBracketedConstants() : ReadCharOrEscape(); - - for (const char& c : sequence) - { - if (destLength == kMaxStringLength) - RaiseError("mapped string longer than %d bytes", kMaxStringLength); - - dest[destLength++] = c; - } - } - - m_pos++; // Go past the right quote. - - return m_pos - start; -} - -void StringParser::RaiseError(const char* format, ...) -{ - const int bufferSize = 1024; - char buffer[bufferSize]; - - std::va_list args; - va_start(args, format); - std::vsnprintf(buffer, bufferSize, format, args); - va_end(args); - - throw std::runtime_error(buffer); -} - -// Converts digit character to numerical value. -static int ConvertDigit(char c, int radix) -{ - int digit; - - if (c >= '0' && c <= '9') - digit = c - '0'; - else if (c >= 'A' && c <= 'F') - digit = 10 + c - 'A'; - else if (c >= 'a' && c <= 'f') - digit = 10 + c - 'a'; - else - return -1; - - return (digit < radix) ? digit : -1; -} - -void StringParser::SkipRestOfInteger(int radix) -{ - while (ConvertDigit(m_buffer[m_pos], radix) != -1) - m_pos++; -} - -StringParser::Integer StringParser::ReadDecimal() -{ - const int radix = 10; - std::uint64_t n = 0; - int digit; - std::uint64_t max = UINT32_MAX; - long startPos = m_pos; - - while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1) - { - n = n * radix + digit; - - if (n >= max) - { - SkipRestOfInteger(radix); - - std::string intLiteral(m_buffer + startPos, m_pos - startPos); - RaiseError("integer literal \"%s\" is too large", intLiteral.c_str()); - } - - m_pos++; - } - - int size; - - if (m_buffer[m_pos] == 'H') - { - if (n >= 0x10000) - { - RaiseError("%lu is too large to be a halfword", (unsigned long)n); - } - - size = 2; - m_pos++; - } - else if (m_buffer[m_pos] == 'W') - { - size = 4; - m_pos++; - } - else - { - if (n >= 0x10000) - size = 4; - else if (n >= 0x100) - size = 2; - else - size = 1; - } - - return{ static_cast(n), size }; -} - -StringParser::Integer StringParser::ReadHex() -{ - const int radix = 16; - std::uint64_t n = 0; - int digit; - std::uint64_t max = UINT32_MAX; - long startPos = m_pos; - - while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1) - { - n = n * radix + digit; - - if (n >= max) - { - SkipRestOfInteger(radix); - - std::string intLiteral(m_buffer + startPos, m_pos - startPos); - RaiseError("integer literal \"%s\" is too large", intLiteral.c_str()); - } - - m_pos++; - } - - int length = m_pos - startPos; - int size = 0; - - switch (length) - { - case 2: - size = 1; - break; - case 4: - size = 2; - break; - case 8: - size = 4; - break; - default: - { - std::string intLiteral(m_buffer + startPos, m_pos - startPos); - RaiseError("hex integer literal \"0x%s\" doesn't have length of 2, 4, or 8 digits", intLiteral.c_str()); - } - } - - return{ static_cast(n), size }; -} - -StringParser::Integer StringParser::ReadInteger() -{ - if (!IsAsciiDigit(m_buffer[m_pos])) - RaiseError("expected integer"); - - if (m_buffer[m_pos] == '0' && m_buffer[m_pos + 1] == 'x') - { - m_pos += 2; - return ReadHex(); - } - - return ReadDecimal(); -} - -// Skips tabs and spaces. -void StringParser::SkipWhitespace() -{ - while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ') - m_pos++; -} +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include +#include +#include "preproc.h" +#include "string_parser.h" +#include "char_util.h" +#include "utf8.h" + +// Reads a charmap char or escape sequence. +std::string StringParser::ReadCharOrEscape() +{ + std::string sequence; + + bool isEscape = (m_buffer[m_pos] == '\\'); + + if (isEscape) + { + m_pos++; + + if (m_buffer[m_pos] == '"') + { + sequence = g_charmap->Char('"'); + + if (sequence.length() == 0) + RaiseError("no mapping exists for double quote"); + + return sequence; + } + else if (m_buffer[m_pos] == '\\') + { + sequence = g_charmap->Char('\\'); + + if (sequence.length() == 0) + RaiseError("no mapping exists for backslash"); + + return sequence; + } + } + + unsigned char c = m_buffer[m_pos]; + + if (c == 0) + { + if (m_pos >= m_size) + RaiseError("unexpected EOF in UTF-8 string"); + else + RaiseError("unexpected null character in UTF-8 string"); + } + + if (IsAscii(c) && !IsAsciiPrintable(c)) + RaiseError("unexpected character U+%X in UTF-8 string", c); + + UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]); + m_pos += unicodeChar.encodingLength; + std::int32_t code = unicodeChar.code; + + if (code == -1) + RaiseError("invalid encoding in UTF-8 string"); + + if (isEscape && code >= 128) + RaiseError("escapes using non-ASCII characters are invalid"); + + sequence = isEscape ? g_charmap->Escape(code) : g_charmap->Char(code); + + if (sequence.length() == 0) + { + if (isEscape) + RaiseError("unknown escape '\\%c'", code); + else + RaiseError("unknown character U+%X", code); + } + + return sequence; +} + +// Reads a charmap constant, i.e. "{FOO}". +std::string StringParser::ReadBracketedConstants() +{ + std::string totalSequence; + + m_pos++; // Assume we're on the left curly bracket. + + while (m_buffer[m_pos] != '}') + { + SkipWhitespace(); + + if (IsIdentifierStartingChar(m_buffer[m_pos])) + { + long startPos = m_pos; + + m_pos++; + + while (IsIdentifierChar(m_buffer[m_pos])) + m_pos++; + + std::string sequence = g_charmap->Constant(std::string(&m_buffer[startPos], m_pos - startPos)); + + if (sequence.length() == 0) + { + m_buffer[m_pos] = 0; + RaiseError("unknown constant '%s'", &m_buffer[startPos]); + } + + totalSequence += sequence; + } + else if (IsAsciiDigit(m_buffer[m_pos])) + { + Integer integer = ReadInteger(); + + switch (integer.size) + { + case 1: + totalSequence += (unsigned char)integer.value; + break; + case 2: + totalSequence += (unsigned char)integer.value; + totalSequence += (unsigned char)(integer.value >> 8); + break; + case 4: + totalSequence += (unsigned char)integer.value; + totalSequence += (unsigned char)(integer.value >> 8); + totalSequence += (unsigned char)(integer.value >> 16); + totalSequence += (unsigned char)(integer.value >> 24); + break; + } + } + else if (m_buffer[m_pos] == 0) + { + if (m_pos >= m_size) + RaiseError("unexpected EOF after left curly bracket"); + else + RaiseError("unexpected null character within curly brackets"); + } + else + { + if (IsAsciiPrintable(m_buffer[m_pos])) + RaiseError("unexpected character '%c' within curly brackets", m_buffer[m_pos]); + else + RaiseError("unexpected character '\\x%02X' within curly brackets", m_buffer[m_pos]); + } + } + + m_pos++; // Go past the right curly bracket. + + return totalSequence; +} + +// Reads a charmap string. +int StringParser::ParseString(long srcPos, unsigned char* dest, int& destLength) +{ + m_pos = srcPos; + + if (m_buffer[m_pos] != '"') + RaiseError("expected UTF-8 string literal"); + + long start = m_pos; + + m_pos++; + + destLength = 0; + + while (m_buffer[m_pos] != '"') + { + std::string sequence = (m_buffer[m_pos] == '{') ? ReadBracketedConstants() : ReadCharOrEscape(); + + for (const char& c : sequence) + { + if (destLength == kMaxStringLength) + RaiseError("mapped string longer than %d bytes", kMaxStringLength); + + dest[destLength++] = c; + } + } + + m_pos++; // Go past the right quote. + + return m_pos - start; +} + +void StringParser::RaiseError(const char* format, ...) +{ + const int bufferSize = 1024; + char buffer[bufferSize]; + + std::va_list args; + va_start(args, format); + std::vsnprintf(buffer, bufferSize, format, args); + va_end(args); + + throw std::runtime_error(buffer); +} + +// Converts digit character to numerical value. +static int ConvertDigit(char c, int radix) +{ + int digit; + + if (c >= '0' && c <= '9') + digit = c - '0'; + else if (c >= 'A' && c <= 'F') + digit = 10 + c - 'A'; + else if (c >= 'a' && c <= 'f') + digit = 10 + c - 'a'; + else + return -1; + + return (digit < radix) ? digit : -1; +} + +void StringParser::SkipRestOfInteger(int radix) +{ + while (ConvertDigit(m_buffer[m_pos], radix) != -1) + m_pos++; +} + +StringParser::Integer StringParser::ReadDecimal() +{ + const int radix = 10; + std::uint64_t n = 0; + int digit; + std::uint64_t max = UINT32_MAX; + long startPos = m_pos; + + while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1) + { + n = n * radix + digit; + + if (n >= max) + { + SkipRestOfInteger(radix); + + std::string intLiteral(m_buffer + startPos, m_pos - startPos); + RaiseError("integer literal \"%s\" is too large", intLiteral.c_str()); + } + + m_pos++; + } + + int size; + + if (m_buffer[m_pos] == 'H') + { + if (n >= 0x10000) + { + RaiseError("%lu is too large to be a halfword", (unsigned long)n); + } + + size = 2; + m_pos++; + } + else if (m_buffer[m_pos] == 'W') + { + size = 4; + m_pos++; + } + else + { + if (n >= 0x10000) + size = 4; + else if (n >= 0x100) + size = 2; + else + size = 1; + } + + return{ static_cast(n), size }; +} + +StringParser::Integer StringParser::ReadHex() +{ + const int radix = 16; + std::uint64_t n = 0; + int digit; + std::uint64_t max = UINT32_MAX; + long startPos = m_pos; + + while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1) + { + n = n * radix + digit; + + if (n >= max) + { + SkipRestOfInteger(radix); + + std::string intLiteral(m_buffer + startPos, m_pos - startPos); + RaiseError("integer literal \"%s\" is too large", intLiteral.c_str()); + } + + m_pos++; + } + + int length = m_pos - startPos; + int size = 0; + + switch (length) + { + case 2: + size = 1; + break; + case 4: + size = 2; + break; + case 8: + size = 4; + break; + default: + { + std::string intLiteral(m_buffer + startPos, m_pos - startPos); + RaiseError("hex integer literal \"0x%s\" doesn't have length of 2, 4, or 8 digits", intLiteral.c_str()); + } + } + + return{ static_cast(n), size }; +} + +StringParser::Integer StringParser::ReadInteger() +{ + if (!IsAsciiDigit(m_buffer[m_pos])) + RaiseError("expected integer"); + + if (m_buffer[m_pos] == '0' && m_buffer[m_pos + 1] == 'x') + { + m_pos += 2; + return ReadHex(); + } + + return ReadDecimal(); +} + +// Skips tabs and spaces. +void StringParser::SkipWhitespace() +{ + while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ') + m_pos++; +} -- cgit v1.2.3