From 8237e29a164211eb2ec4cd161eb4183cc1947fee Mon Sep 17 00:00:00 2001 From: Cheng Hann Gan Date: Thu, 9 Sep 2021 19:22:48 -0400 Subject: Defined more in-dungeon structs and enums (#53) * Defined DungeonEntity * Rename EntityType enums * Revert EntityType rename * Defined more in-dungeon structs and enums * Added more dungeon global structs/enums * Prefixed dungeonGlobalData with g * Fixed compile errors * Removed some CRLFs * Fixed compile after merge * Revert Makefile * Rename DungeonEntityData.entityType Co-authored-by: Seth Barberee * Renamed symbols per PR comments Co-authored-by: Cheng Hann Gan Co-authored-by: Seth Barberee --- tools/preproc/.gitignore | 2 +- tools/preproc/asm_file.cpp | 1064 +++++++++++++++++++-------------------- tools/preproc/c_file.cpp | 860 +++++++++++++++---------------- tools/preproc/charmap.cpp | 816 +++++++++++++++--------------- tools/preproc/preproc.cpp | 312 ++++++------ tools/preproc/string_parser.cpp | 710 +++++++++++++------------- tools/preproc/utf8.cpp | 184 +++---- 7 files changed, 1974 insertions(+), 1974 deletions(-) (limited to 'tools/preproc') diff --git a/tools/preproc/.gitignore b/tools/preproc/.gitignore index afa6b22..eb34708 100644 --- a/tools/preproc/.gitignore +++ b/tools/preproc/.gitignore @@ -1 +1 @@ -preproc +preproc diff --git a/tools/preproc/asm_file.cpp b/tools/preproc/asm_file.cpp index ce0bf31..7756cad 100644 --- a/tools/preproc/asm_file.cpp +++ b/tools/preproc/asm_file.cpp @@ -1,532 +1,532 @@ -// Copyright(c) 2016 YamaArashi -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include -#include -#include -#include "preproc.h" -#include "asm_file.h" -#include "char_util.h" -#include "utf8.h" -#include "string_parser.h" - -AsmFile::AsmFile(std::string filename) : m_filename(filename) -{ - FILE *fp = std::fopen(filename.c_str(), "rb"); - - if (fp == NULL) - FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str()); - - std::fseek(fp, 0, SEEK_END); - - m_size = std::ftell(fp); - - if (m_size < 0) - FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str()); - - m_buffer = new char[m_size + 1]; - - std::rewind(fp); - - if (std::fread(m_buffer, m_size, 1, fp) != 1) - FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str()); - - m_buffer[m_size] = 0; - - std::fclose(fp); - - m_pos = 0; - m_lineNum = 1; - m_lineStart = 0; - - RemoveComments(); -} - -AsmFile::AsmFile(AsmFile&& other) : m_filename(std::move(other.m_filename)) -{ - m_buffer = other.m_buffer; - m_pos = other.m_pos; - m_size = other.m_size; - m_lineNum = other.m_lineNum; - m_lineStart = other.m_lineStart; - - other.m_buffer = nullptr; -} - -AsmFile::~AsmFile() -{ - delete[] m_buffer; -} - -// Removes comments to simplify further processing. -// It stops upon encountering a null character, -// which may or may not be the end of file marker. -// If it's not, the error will be caught later. -void AsmFile::RemoveComments() -{ - long pos = 0; - char stringChar = 0; - - for (;;) - { - if (m_buffer[pos] == 0) - return; - - if (stringChar != 0) - { - if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == stringChar) - { - pos += 2; - } - else - { - if (m_buffer[pos] == stringChar) - stringChar = 0; - pos++; - } - } - else if (m_buffer[pos] == '@' && (pos == 0 || m_buffer[pos - 1] != '\\')) - { - while (m_buffer[pos] != '\n' && m_buffer[pos] != 0) - m_buffer[pos++] = ' '; - } - else if (m_buffer[pos] == '/' && m_buffer[pos + 1] == '*') - { - m_buffer[pos++] = ' '; - m_buffer[pos++] = ' '; - - for (;;) - { - if (m_buffer[pos] == 0) - return; - - if (m_buffer[pos] == '*' && m_buffer[pos + 1] == '/') - { - m_buffer[pos++] = ' '; - m_buffer[pos++] = ' '; - break; - } - else - { - if (m_buffer[pos] != '\n') - m_buffer[pos] = ' '; - pos++; - } - } - } - else - { - if (m_buffer[pos] == '"' || m_buffer[pos] == '\'') - stringChar = m_buffer[pos]; - pos++; - } - } -} - -// Checks if we're at a particular directive and if so, consumes it. -// Returns whether the directive was found. -bool AsmFile::CheckForDirective(std::string name) -{ - long i; - long length = static_cast(name.length()); - - for (i = 0; i < length && m_pos + i < m_size; i++) - if (name[i] != m_buffer[m_pos + i]) - return false; - - if (i < length) - return false; - - m_pos += length; - - return true; -} - -// Checks if we're at a known directive and if so, consumes it. -// Returns which directive was found. -Directive AsmFile::GetDirective() -{ - SkipWhitespace(); - - if (CheckForDirective(".include")) - return Directive::Include; - else if (CheckForDirective(".string")) - return Directive::String; - else if (CheckForDirective(".braille")) - return Directive::Braille; - else - return Directive::Unknown; -} - -// Checks if we're at label that ends with '::'. -// Returns the name if so and an empty string if not. -std::string AsmFile::GetGlobalLabel() -{ - long start = m_pos; - long pos = m_pos; - - if (IsIdentifierStartingChar(m_buffer[pos])) - { - pos++; - - while (IsIdentifierChar(m_buffer[pos])) - pos++; - } - - if (m_buffer[pos] == ':' && m_buffer[pos + 1] == ':') - { - m_pos = pos + 2; - ExpectEmptyRestOfLine(); - return std::string(&m_buffer[start], pos - start); - } - - return std::string(); -} - -// Skips tabs and spaces. -void AsmFile::SkipWhitespace() -{ - while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ') - m_pos++; -} - -// Reads include path. -std::string AsmFile::ReadPath() -{ - SkipWhitespace(); - - if (m_buffer[m_pos] != '"') - RaiseError("expected file path"); - - m_pos++; - - int length = 0; - long startPos = m_pos; - - while (m_buffer[m_pos] != '"') - { - unsigned char c = m_buffer[m_pos++]; - - if (c == 0) - { - if (m_pos >= m_size) - RaiseError("unexpected EOF in include string"); - else - RaiseError("unexpected null character in include string"); - } - - if (!IsAsciiPrintable(c)) - RaiseError("unexpected character '\\x%02X' in include string", c); - - // Don't bother allowing any escape sequences. - if (c == '\\') - { - c = m_buffer[m_pos]; - RaiseError("unexpected escape '\\%c' in include string", c); - } - - length++; - - if (length > kMaxPath) - RaiseError("path is too long"); - } - - m_pos++; // Go past the right quote. - - ExpectEmptyRestOfLine(); - - return std::string(&m_buffer[startPos], length); -} - -// Reads a charmap string. -int AsmFile::ReadString(unsigned char* s) -{ - SkipWhitespace(); - - int length; - StringParser stringParser(m_buffer, m_size); - - try - { - m_pos += stringParser.ParseString(m_pos, s, length); - } - catch (std::runtime_error& e) - { - RaiseError(e.what()); - } - - SkipWhitespace(); - - if (ConsumeComma()) - { - SkipWhitespace(); - int padLength = ReadPadLength(); - - while (length < padLength) - { - s[length++] = 0; - } - } - - ExpectEmptyRestOfLine(); - - return length; -} - -int AsmFile::ReadBraille(unsigned char* s) -{ - static std::map encoding = - { - { 'A', 0x01 }, - { 'B', 0x05 }, - { 'C', 0x03 }, - { 'D', 0x0B }, - { 'E', 0x09 }, - { 'F', 0x07 }, - { 'G', 0x0F }, - { 'H', 0x0D }, - { 'I', 0x06 }, - { 'J', 0x0E }, - { 'K', 0x11 }, - { 'L', 0x15 }, - { 'M', 0x13 }, - { 'N', 0x1B }, - { 'O', 0x19 }, - { 'P', 0x17 }, - { 'Q', 0x1F }, - { 'R', 0x1D }, - { 'S', 0x16 }, - { 'T', 0x1E }, - { 'U', 0x31 }, - { 'V', 0x35 }, - { 'W', 0x2E }, - { 'X', 0x33 }, - { 'Y', 0x3B }, - { 'Z', 0x39 }, - { ' ', 0x00 }, - { ',', 0x04 }, - { '.', 0x2C }, - { '$', 0xFF }, - }; - - SkipWhitespace(); - - int length = 0; - - if (m_buffer[m_pos] != '"') - RaiseError("expected braille string literal"); - - m_pos++; - - while (m_buffer[m_pos] != '"') - { - if (length == kMaxStringLength) - RaiseError("mapped string longer than %d bytes", kMaxStringLength); - - if (m_buffer[m_pos] == '\\' && m_buffer[m_pos + 1] == 'n') - { - s[length++] = 0xFE; - m_pos += 2; - } - else - { - char c = m_buffer[m_pos]; - - if (encoding.count(c) == 0) - { - if (IsAsciiPrintable(c)) - RaiseError("character '%c' not valid in braille string", m_buffer[m_pos]); - else - RaiseError("character '\\x%02X' not valid in braille string", m_buffer[m_pos]); - } - - s[length++] = encoding[c]; - m_pos++; - } - } - - m_pos++; // Go past the right quote. - - ExpectEmptyRestOfLine(); - - return length; -} - -// If we're at a comma, consumes it. -// Returns whether a comma was found. -bool AsmFile::ConsumeComma() -{ - if (m_buffer[m_pos] == ',') - { - m_pos++; - return true; - } - - return false; -} - -// Converts digit character to numerical value. -static int ConvertDigit(char c, int radix) -{ - int digit; - - if (c >= '0' && c <= '9') - digit = c - '0'; - else if (c >= 'A' && c <= 'F') - digit = 10 + c - 'A'; - else if (c >= 'a' && c <= 'f') - digit = 10 + c - 'a'; - else - return -1; - - return (digit < radix) ? digit : -1; -} - -// Reads an integer. If the integer is greater than maxValue, it returns -1. -int AsmFile::ReadPadLength() -{ - if (!IsAsciiDigit(m_buffer[m_pos])) - RaiseError("expected integer"); - - int radix = 10; - - if (m_buffer[m_pos] == '0' && m_buffer[m_pos + 1] == 'x') - { - radix = 16; - m_pos += 2; - } - - unsigned n = 0; - int digit; - - while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1) - { - n = n * radix + digit; - - if (n > kMaxStringLength) - RaiseError("pad length greater than maximum length (%d)", kMaxStringLength); - - m_pos++; - } - - return n; -} - -// Outputs the current line and moves to the next one. -void AsmFile::OutputLine() -{ - while (m_buffer[m_pos] != '\n' && m_buffer[m_pos] != 0) - m_pos++; - - if (m_buffer[m_pos] == 0) - { - if (m_pos >= m_size) - { - RaiseWarning("file doesn't end with newline"); - puts(&m_buffer[m_lineStart]); - } - else - { - RaiseError("unexpected null character"); - } - } - else - { - m_buffer[m_pos] = 0; - puts(&m_buffer[m_lineStart]); - m_buffer[m_pos] = '\n'; - m_pos++; - m_lineStart = m_pos; - m_lineNum++; - } -} - -// Asserts that the rest of the line is empty and moves to the next one. -void AsmFile::ExpectEmptyRestOfLine() -{ - SkipWhitespace(); - - if (m_buffer[m_pos] == 0) - { - if (m_pos >= m_size) - RaiseWarning("file doesn't end with newline"); - else - RaiseError("unexpected null character"); - } - else if (m_buffer[m_pos] == '\n') - { - m_pos++; - m_lineStart = m_pos; - m_lineNum++; - } - else if (m_buffer[m_pos] == '\r' && m_buffer[m_pos + 1] == '\n') - { - m_pos += 2; - m_lineStart = m_pos; - m_lineNum++; - } - else - { - RaiseError("junk at end of line"); - } -} - -// Checks if we're at the end of the file. -bool AsmFile::IsAtEnd() -{ - return (m_pos >= m_size); -} - -// Output the current location to set gas's logical file and line numbers. -void AsmFile::OutputLocation() -{ - std::printf("# %ld \"%s\"\n", m_lineNum, m_filename.c_str()); -} - -// Reports a diagnostic message. -void AsmFile::ReportDiagnostic(const char* type, const char* format, std::va_list args) -{ - const int bufferSize = 1024; - char buffer[bufferSize]; - std::vsnprintf(buffer, bufferSize, format, args); - std::fprintf(stderr, "%s:%ld: %s: %s\n", m_filename.c_str(), m_lineNum, type, buffer); -} - -#define DO_REPORT(type) \ -do \ -{ \ - std::va_list args; \ - va_start(args, format); \ - ReportDiagnostic(type, format, args); \ - va_end(args); \ -} while (0) - -// Reports an error diagnostic and terminates the program. -void AsmFile::RaiseError(const char* format, ...) -{ - DO_REPORT("error"); - std::exit(1); -} - -// Reports a warning diagnostic. -void AsmFile::RaiseWarning(const char* format, ...) -{ - DO_REPORT("warning"); -} +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include +#include +#include "preproc.h" +#include "asm_file.h" +#include "char_util.h" +#include "utf8.h" +#include "string_parser.h" + +AsmFile::AsmFile(std::string filename) : m_filename(filename) +{ + FILE *fp = std::fopen(filename.c_str(), "rb"); + + if (fp == NULL) + FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str()); + + std::fseek(fp, 0, SEEK_END); + + m_size = std::ftell(fp); + + if (m_size < 0) + FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str()); + + m_buffer = new char[m_size + 1]; + + std::rewind(fp); + + if (std::fread(m_buffer, m_size, 1, fp) != 1) + FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str()); + + m_buffer[m_size] = 0; + + std::fclose(fp); + + m_pos = 0; + m_lineNum = 1; + m_lineStart = 0; + + RemoveComments(); +} + +AsmFile::AsmFile(AsmFile&& other) : m_filename(std::move(other.m_filename)) +{ + m_buffer = other.m_buffer; + m_pos = other.m_pos; + m_size = other.m_size; + m_lineNum = other.m_lineNum; + m_lineStart = other.m_lineStart; + + other.m_buffer = nullptr; +} + +AsmFile::~AsmFile() +{ + delete[] m_buffer; +} + +// Removes comments to simplify further processing. +// It stops upon encountering a null character, +// which may or may not be the end of file marker. +// If it's not, the error will be caught later. +void AsmFile::RemoveComments() +{ + long pos = 0; + char stringChar = 0; + + for (;;) + { + if (m_buffer[pos] == 0) + return; + + if (stringChar != 0) + { + if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == stringChar) + { + pos += 2; + } + else + { + if (m_buffer[pos] == stringChar) + stringChar = 0; + pos++; + } + } + else if (m_buffer[pos] == '@' && (pos == 0 || m_buffer[pos - 1] != '\\')) + { + while (m_buffer[pos] != '\n' && m_buffer[pos] != 0) + m_buffer[pos++] = ' '; + } + else if (m_buffer[pos] == '/' && m_buffer[pos + 1] == '*') + { + m_buffer[pos++] = ' '; + m_buffer[pos++] = ' '; + + for (;;) + { + if (m_buffer[pos] == 0) + return; + + if (m_buffer[pos] == '*' && m_buffer[pos + 1] == '/') + { + m_buffer[pos++] = ' '; + m_buffer[pos++] = ' '; + break; + } + else + { + if (m_buffer[pos] != '\n') + m_buffer[pos] = ' '; + pos++; + } + } + } + else + { + if (m_buffer[pos] == '"' || m_buffer[pos] == '\'') + stringChar = m_buffer[pos]; + pos++; + } + } +} + +// Checks if we're at a particular directive and if so, consumes it. +// Returns whether the directive was found. +bool AsmFile::CheckForDirective(std::string name) +{ + long i; + long length = static_cast(name.length()); + + for (i = 0; i < length && m_pos + i < m_size; i++) + if (name[i] != m_buffer[m_pos + i]) + return false; + + if (i < length) + return false; + + m_pos += length; + + return true; +} + +// Checks if we're at a known directive and if so, consumes it. +// Returns which directive was found. +Directive AsmFile::GetDirective() +{ + SkipWhitespace(); + + if (CheckForDirective(".include")) + return Directive::Include; + else if (CheckForDirective(".string")) + return Directive::String; + else if (CheckForDirective(".braille")) + return Directive::Braille; + else + return Directive::Unknown; +} + +// Checks if we're at label that ends with '::'. +// Returns the name if so and an empty string if not. +std::string AsmFile::GetGlobalLabel() +{ + long start = m_pos; + long pos = m_pos; + + if (IsIdentifierStartingChar(m_buffer[pos])) + { + pos++; + + while (IsIdentifierChar(m_buffer[pos])) + pos++; + } + + if (m_buffer[pos] == ':' && m_buffer[pos + 1] == ':') + { + m_pos = pos + 2; + ExpectEmptyRestOfLine(); + return std::string(&m_buffer[start], pos - start); + } + + return std::string(); +} + +// Skips tabs and spaces. +void AsmFile::SkipWhitespace() +{ + while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ') + m_pos++; +} + +// Reads include path. +std::string AsmFile::ReadPath() +{ + SkipWhitespace(); + + if (m_buffer[m_pos] != '"') + RaiseError("expected file path"); + + m_pos++; + + int length = 0; + long startPos = m_pos; + + while (m_buffer[m_pos] != '"') + { + unsigned char c = m_buffer[m_pos++]; + + if (c == 0) + { + if (m_pos >= m_size) + RaiseError("unexpected EOF in include string"); + else + RaiseError("unexpected null character in include string"); + } + + if (!IsAsciiPrintable(c)) + RaiseError("unexpected character '\\x%02X' in include string", c); + + // Don't bother allowing any escape sequences. + if (c == '\\') + { + c = m_buffer[m_pos]; + RaiseError("unexpected escape '\\%c' in include string", c); + } + + length++; + + if (length > kMaxPath) + RaiseError("path is too long"); + } + + m_pos++; // Go past the right quote. + + ExpectEmptyRestOfLine(); + + return std::string(&m_buffer[startPos], length); +} + +// Reads a charmap string. +int AsmFile::ReadString(unsigned char* s) +{ + SkipWhitespace(); + + int length; + StringParser stringParser(m_buffer, m_size); + + try + { + m_pos += stringParser.ParseString(m_pos, s, length); + } + catch (std::runtime_error& e) + { + RaiseError(e.what()); + } + + SkipWhitespace(); + + if (ConsumeComma()) + { + SkipWhitespace(); + int padLength = ReadPadLength(); + + while (length < padLength) + { + s[length++] = 0; + } + } + + ExpectEmptyRestOfLine(); + + return length; +} + +int AsmFile::ReadBraille(unsigned char* s) +{ + static std::map encoding = + { + { 'A', 0x01 }, + { 'B', 0x05 }, + { 'C', 0x03 }, + { 'D', 0x0B }, + { 'E', 0x09 }, + { 'F', 0x07 }, + { 'G', 0x0F }, + { 'H', 0x0D }, + { 'I', 0x06 }, + { 'J', 0x0E }, + { 'K', 0x11 }, + { 'L', 0x15 }, + { 'M', 0x13 }, + { 'N', 0x1B }, + { 'O', 0x19 }, + { 'P', 0x17 }, + { 'Q', 0x1F }, + { 'R', 0x1D }, + { 'S', 0x16 }, + { 'T', 0x1E }, + { 'U', 0x31 }, + { 'V', 0x35 }, + { 'W', 0x2E }, + { 'X', 0x33 }, + { 'Y', 0x3B }, + { 'Z', 0x39 }, + { ' ', 0x00 }, + { ',', 0x04 }, + { '.', 0x2C }, + { '$', 0xFF }, + }; + + SkipWhitespace(); + + int length = 0; + + if (m_buffer[m_pos] != '"') + RaiseError("expected braille string literal"); + + m_pos++; + + while (m_buffer[m_pos] != '"') + { + if (length == kMaxStringLength) + RaiseError("mapped string longer than %d bytes", kMaxStringLength); + + if (m_buffer[m_pos] == '\\' && m_buffer[m_pos + 1] == 'n') + { + s[length++] = 0xFE; + m_pos += 2; + } + else + { + char c = m_buffer[m_pos]; + + if (encoding.count(c) == 0) + { + if (IsAsciiPrintable(c)) + RaiseError("character '%c' not valid in braille string", m_buffer[m_pos]); + else + RaiseError("character '\\x%02X' not valid in braille string", m_buffer[m_pos]); + } + + s[length++] = encoding[c]; + m_pos++; + } + } + + m_pos++; // Go past the right quote. + + ExpectEmptyRestOfLine(); + + return length; +} + +// If we're at a comma, consumes it. +// Returns whether a comma was found. +bool AsmFile::ConsumeComma() +{ + if (m_buffer[m_pos] == ',') + { + m_pos++; + return true; + } + + return false; +} + +// Converts digit character to numerical value. +static int ConvertDigit(char c, int radix) +{ + int digit; + + if (c >= '0' && c <= '9') + digit = c - '0'; + else if (c >= 'A' && c <= 'F') + digit = 10 + c - 'A'; + else if (c >= 'a' && c <= 'f') + digit = 10 + c - 'a'; + else + return -1; + + return (digit < radix) ? digit : -1; +} + +// Reads an integer. If the integer is greater than maxValue, it returns -1. +int AsmFile::ReadPadLength() +{ + if (!IsAsciiDigit(m_buffer[m_pos])) + RaiseError("expected integer"); + + int radix = 10; + + if (m_buffer[m_pos] == '0' && m_buffer[m_pos + 1] == 'x') + { + radix = 16; + m_pos += 2; + } + + unsigned n = 0; + int digit; + + while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1) + { + n = n * radix + digit; + + if (n > kMaxStringLength) + RaiseError("pad length greater than maximum length (%d)", kMaxStringLength); + + m_pos++; + } + + return n; +} + +// Outputs the current line and moves to the next one. +void AsmFile::OutputLine() +{ + while (m_buffer[m_pos] != '\n' && m_buffer[m_pos] != 0) + m_pos++; + + if (m_buffer[m_pos] == 0) + { + if (m_pos >= m_size) + { + RaiseWarning("file doesn't end with newline"); + puts(&m_buffer[m_lineStart]); + } + else + { + RaiseError("unexpected null character"); + } + } + else + { + m_buffer[m_pos] = 0; + puts(&m_buffer[m_lineStart]); + m_buffer[m_pos] = '\n'; + m_pos++; + m_lineStart = m_pos; + m_lineNum++; + } +} + +// Asserts that the rest of the line is empty and moves to the next one. +void AsmFile::ExpectEmptyRestOfLine() +{ + SkipWhitespace(); + + if (m_buffer[m_pos] == 0) + { + if (m_pos >= m_size) + RaiseWarning("file doesn't end with newline"); + else + RaiseError("unexpected null character"); + } + else if (m_buffer[m_pos] == '\n') + { + m_pos++; + m_lineStart = m_pos; + m_lineNum++; + } + else if (m_buffer[m_pos] == '\r' && m_buffer[m_pos + 1] == '\n') + { + m_pos += 2; + m_lineStart = m_pos; + m_lineNum++; + } + else + { + RaiseError("junk at end of line"); + } +} + +// Checks if we're at the end of the file. +bool AsmFile::IsAtEnd() +{ + return (m_pos >= m_size); +} + +// Output the current location to set gas's logical file and line numbers. +void AsmFile::OutputLocation() +{ + std::printf("# %ld \"%s\"\n", m_lineNum, m_filename.c_str()); +} + +// Reports a diagnostic message. +void AsmFile::ReportDiagnostic(const char* type, const char* format, std::va_list args) +{ + const int bufferSize = 1024; + char buffer[bufferSize]; + std::vsnprintf(buffer, bufferSize, format, args); + std::fprintf(stderr, "%s:%ld: %s: %s\n", m_filename.c_str(), m_lineNum, type, buffer); +} + +#define DO_REPORT(type) \ +do \ +{ \ + std::va_list args; \ + va_start(args, format); \ + ReportDiagnostic(type, format, args); \ + va_end(args); \ +} while (0) + +// Reports an error diagnostic and terminates the program. +void AsmFile::RaiseError(const char* format, ...) +{ + DO_REPORT("error"); + std::exit(1); +} + +// Reports a warning diagnostic. +void AsmFile::RaiseWarning(const char* format, ...) +{ + DO_REPORT("warning"); +} diff --git a/tools/preproc/c_file.cpp b/tools/preproc/c_file.cpp index 6213a8a..59426ef 100644 --- a/tools/preproc/c_file.cpp +++ b/tools/preproc/c_file.cpp @@ -1,430 +1,430 @@ -// Copyright(c) 2016 YamaArashi -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include -#include -#include -#include -#include -#include "preproc.h" -#include "c_file.h" -#include "char_util.h" -#include "utf8.h" -#include "string_parser.h" - -CFile::CFile(std::string filename) : m_filename(filename) -{ - FILE *fp = std::fopen(filename.c_str(), "rb"); - - if (fp == NULL) - FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str()); - - std::fseek(fp, 0, SEEK_END); - - m_size = std::ftell(fp); - - if (m_size < 0) - FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str()); - - m_buffer = new char[m_size + 1]; - - std::rewind(fp); - - if (std::fread(m_buffer, m_size, 1, fp) != 1) - FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str()); - - m_buffer[m_size] = 0; - - std::fclose(fp); - - m_pos = 0; - m_lineNum = 1; -} - -CFile::CFile(CFile&& other) : m_filename(std::move(other.m_filename)) -{ - m_buffer = other.m_buffer; - m_pos = other.m_pos; - m_size = other.m_size; - m_lineNum = other.m_lineNum; - - other.m_buffer = nullptr; -} - -CFile::~CFile() -{ - delete[] m_buffer; -} - -void CFile::Preproc() -{ - char stringChar = 0; - - while (m_pos < m_size) - { - if (stringChar) - { - if (m_buffer[m_pos] == stringChar) - { - std::putchar(stringChar); - m_pos++; - stringChar = 0; - } - else if (m_buffer[m_pos] == '\\' && m_buffer[m_pos + 1] == stringChar) - { - std::putchar('\\'); - std::putchar(stringChar); - m_pos += 2; - } - else - { - if (m_buffer[m_pos] == '\n') - m_lineNum++; - std::putchar(m_buffer[m_pos]); - m_pos++; - } - } - else - { - TryConvertString(); - TryConvertIncbin(); - - if (m_pos >= m_size) - break; - - char c = m_buffer[m_pos++]; - - std::putchar(c); - - if (c == '\n') - m_lineNum++; - else if (c == '"') - stringChar = '"'; - else if (c == '\'') - stringChar = '\''; - } - } -} - -bool CFile::ConsumeHorizontalWhitespace() -{ - if (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ') - { - m_pos++; - return true; - } - - return false; -} - -bool CFile::ConsumeNewline() -{ - if (m_buffer[m_pos] == '\r' && m_buffer[m_pos + 1] == '\n') - { - m_pos += 2; - m_lineNum++; - std::putchar('\n'); - return true; - } - - if (m_buffer[m_pos] == '\n') - { - m_pos++; - m_lineNum++; - std::putchar('\n'); - return true; - } - - return false; -} - -void CFile::SkipWhitespace() -{ - while (ConsumeHorizontalWhitespace() || ConsumeNewline()) - ; -} - -void CFile::TryConvertString() -{ - long oldPos = m_pos; - long oldLineNum = m_lineNum; - bool noTerminator = false; - - if (m_buffer[m_pos] != '_' || (m_pos > 0 && IsIdentifierChar(m_buffer[m_pos - 1]))) - return; - - m_pos++; - - if (m_buffer[m_pos] == '_') - { - noTerminator = true; - m_pos++; - } - - SkipWhitespace(); - - if (m_buffer[m_pos] != '(') - { - m_pos = oldPos; - m_lineNum = oldLineNum; - return; - } - - m_pos++; - - SkipWhitespace(); - - std::printf("{ "); - - while (1) - { - SkipWhitespace(); - - if (m_buffer[m_pos] == '"') - { - unsigned char s[kMaxStringLength]; - int length; - StringParser stringParser(m_buffer, m_size); - - try - { - m_pos += stringParser.ParseString(m_pos, s, length); - } - catch (std::runtime_error& e) - { - RaiseError(e.what()); - } - - for (int i = 0; i < length; i++) - printf("0x%02X, ", s[i]); - } - else if (m_buffer[m_pos] == ')') - { - m_pos++; - break; - } - else - { - if (m_pos >= m_size) - RaiseError("unexpected EOF"); - if (IsAsciiPrintable(m_buffer[m_pos])) - RaiseError("unexpected character '%c'", m_buffer[m_pos]); - else - RaiseError("unexpected character '\\x%02X'", m_buffer[m_pos]); - } - } - - if (noTerminator) - std::printf(" }"); - else - std::printf("0x00 }"); -} - -bool CFile::CheckIdentifier(const std::string& ident) -{ - unsigned int i; - - for (i = 0; i < ident.length() && m_pos + i < (unsigned)m_size; i++) - if (ident[i] != m_buffer[m_pos + i]) - return false; - - return (i == ident.length()); -} - -std::unique_ptr CFile::ReadWholeFile(const std::string& path, int& size) -{ - FILE* fp = std::fopen(path.c_str(), "rb"); - - if (fp == nullptr) - RaiseError("Failed to open \"%s\" for reading.\n", path.c_str()); - - std::fseek(fp, 0, SEEK_END); - - size = std::ftell(fp); - - std::unique_ptr buffer = std::unique_ptr(new unsigned char[size]); - - std::rewind(fp); - - if (std::fread(buffer.get(), size, 1, fp) != 1) - RaiseError("Failed to read \"%s\".\n", path.c_str()); - - std::fclose(fp); - - return buffer; -} - -int ExtractData(const std::unique_ptr& buffer, int offset, int size) -{ - switch (size) - { - case 1: - return buffer[offset]; - case 2: - return (buffer[offset + 1] << 8) - | buffer[offset]; - case 4: - return (buffer[offset + 3] << 24) - | (buffer[offset + 2] << 16) - | (buffer[offset + 1] << 8) - | buffer[offset]; - default: - FATAL_ERROR("Invalid size passed to ExtractData.\n"); - } -} - -void CFile::TryConvertIncbin() -{ - std::string idents[6] = { "INCBIN_S8", "INCBIN_U8", "INCBIN_S16", "INCBIN_U16", "INCBIN_S32", "INCBIN_U32" }; - int incbinType = -1; - - for (int i = 0; i < 6; i++) - { - if (CheckIdentifier(idents[i])) - { - incbinType = i; - break; - } - } - - if (incbinType == -1) - return; - - int size = 1 << (incbinType / 2); - bool isSigned = ((incbinType % 2) == 0); - - long oldPos = m_pos; - long oldLineNum = m_lineNum; - - m_pos += idents[incbinType].length(); - - SkipWhitespace(); - - if (m_buffer[m_pos] != '(') - { - m_pos = oldPos; - m_lineNum = oldLineNum; - return; - } - - m_pos++; - - std::printf("{"); - - while (true) - { - SkipWhitespace(); - - if (m_buffer[m_pos] != '"') - RaiseError("expected double quote"); - - m_pos++; - - int startPos = m_pos; - - while (m_buffer[m_pos] != '"') - { - if (m_buffer[m_pos] == 0) - { - if (m_pos >= m_size) - RaiseError("unexpected EOF in path string"); - else - RaiseError("unexpected null character in path string"); - } - - if (m_buffer[m_pos] == '\r' || m_buffer[m_pos] == '\n') - RaiseError("unexpected end of line character in path string"); - - if (m_buffer[m_pos] == '\\') - RaiseError("unexpected escape in path string"); - - m_pos++; - } - - std::string path(&m_buffer[startPos], m_pos - startPos); - - m_pos++; - - int fileSize; - std::unique_ptr buffer = ReadWholeFile(path, fileSize); - - if ((fileSize % size) != 0) - RaiseError("Size %d doesn't evenly divide file size %d.\n", size, fileSize); - - int count = fileSize / size; - int offset = 0; - - for (int i = 0; i < count; i++) - { - int data = ExtractData(buffer, offset, size); - offset += size; - - if (isSigned) - std::printf("%d,", data); - else - std::printf("%uu,", data); - } - - SkipWhitespace(); - - if (m_buffer[m_pos] != ',') - break; - - m_pos++; - } - - if (m_buffer[m_pos] != ')') - RaiseError("expected ')'"); - - m_pos++; - - std::printf("}"); -} - -// Reports a diagnostic message. -void CFile::ReportDiagnostic(const char* type, const char* format, std::va_list args) -{ - const int bufferSize = 1024; - char buffer[bufferSize]; - std::vsnprintf(buffer, bufferSize, format, args); - std::fprintf(stderr, "%s:%ld: %s: %s\n", m_filename.c_str(), m_lineNum, type, buffer); -} - -#define DO_REPORT(type) \ -do \ -{ \ - std::va_list args; \ - va_start(args, format); \ - ReportDiagnostic(type, format, args); \ - va_end(args); \ -} while (0) - -// Reports an error diagnostic and terminates the program. -void CFile::RaiseError(const char* format, ...) -{ - DO_REPORT("error"); - std::exit(1); -} - -// Reports a warning diagnostic. -void CFile::RaiseWarning(const char* format, ...) -{ - DO_REPORT("warning"); -} +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include +#include +#include +#include +#include "preproc.h" +#include "c_file.h" +#include "char_util.h" +#include "utf8.h" +#include "string_parser.h" + +CFile::CFile(std::string filename) : m_filename(filename) +{ + FILE *fp = std::fopen(filename.c_str(), "rb"); + + if (fp == NULL) + FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str()); + + std::fseek(fp, 0, SEEK_END); + + m_size = std::ftell(fp); + + if (m_size < 0) + FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str()); + + m_buffer = new char[m_size + 1]; + + std::rewind(fp); + + if (std::fread(m_buffer, m_size, 1, fp) != 1) + FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str()); + + m_buffer[m_size] = 0; + + std::fclose(fp); + + m_pos = 0; + m_lineNum = 1; +} + +CFile::CFile(CFile&& other) : m_filename(std::move(other.m_filename)) +{ + m_buffer = other.m_buffer; + m_pos = other.m_pos; + m_size = other.m_size; + m_lineNum = other.m_lineNum; + + other.m_buffer = nullptr; +} + +CFile::~CFile() +{ + delete[] m_buffer; +} + +void CFile::Preproc() +{ + char stringChar = 0; + + while (m_pos < m_size) + { + if (stringChar) + { + if (m_buffer[m_pos] == stringChar) + { + std::putchar(stringChar); + m_pos++; + stringChar = 0; + } + else if (m_buffer[m_pos] == '\\' && m_buffer[m_pos + 1] == stringChar) + { + std::putchar('\\'); + std::putchar(stringChar); + m_pos += 2; + } + else + { + if (m_buffer[m_pos] == '\n') + m_lineNum++; + std::putchar(m_buffer[m_pos]); + m_pos++; + } + } + else + { + TryConvertString(); + TryConvertIncbin(); + + if (m_pos >= m_size) + break; + + char c = m_buffer[m_pos++]; + + std::putchar(c); + + if (c == '\n') + m_lineNum++; + else if (c == '"') + stringChar = '"'; + else if (c == '\'') + stringChar = '\''; + } + } +} + +bool CFile::ConsumeHorizontalWhitespace() +{ + if (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ') + { + m_pos++; + return true; + } + + return false; +} + +bool CFile::ConsumeNewline() +{ + if (m_buffer[m_pos] == '\r' && m_buffer[m_pos + 1] == '\n') + { + m_pos += 2; + m_lineNum++; + std::putchar('\n'); + return true; + } + + if (m_buffer[m_pos] == '\n') + { + m_pos++; + m_lineNum++; + std::putchar('\n'); + return true; + } + + return false; +} + +void CFile::SkipWhitespace() +{ + while (ConsumeHorizontalWhitespace() || ConsumeNewline()) + ; +} + +void CFile::TryConvertString() +{ + long oldPos = m_pos; + long oldLineNum = m_lineNum; + bool noTerminator = false; + + if (m_buffer[m_pos] != '_' || (m_pos > 0 && IsIdentifierChar(m_buffer[m_pos - 1]))) + return; + + m_pos++; + + if (m_buffer[m_pos] == '_') + { + noTerminator = true; + m_pos++; + } + + SkipWhitespace(); + + if (m_buffer[m_pos] != '(') + { + m_pos = oldPos; + m_lineNum = oldLineNum; + return; + } + + m_pos++; + + SkipWhitespace(); + + std::printf("{ "); + + while (1) + { + SkipWhitespace(); + + if (m_buffer[m_pos] == '"') + { + unsigned char s[kMaxStringLength]; + int length; + StringParser stringParser(m_buffer, m_size); + + try + { + m_pos += stringParser.ParseString(m_pos, s, length); + } + catch (std::runtime_error& e) + { + RaiseError(e.what()); + } + + for (int i = 0; i < length; i++) + printf("0x%02X, ", s[i]); + } + else if (m_buffer[m_pos] == ')') + { + m_pos++; + break; + } + else + { + if (m_pos >= m_size) + RaiseError("unexpected EOF"); + if (IsAsciiPrintable(m_buffer[m_pos])) + RaiseError("unexpected character '%c'", m_buffer[m_pos]); + else + RaiseError("unexpected character '\\x%02X'", m_buffer[m_pos]); + } + } + + if (noTerminator) + std::printf(" }"); + else + std::printf("0x00 }"); +} + +bool CFile::CheckIdentifier(const std::string& ident) +{ + unsigned int i; + + for (i = 0; i < ident.length() && m_pos + i < (unsigned)m_size; i++) + if (ident[i] != m_buffer[m_pos + i]) + return false; + + return (i == ident.length()); +} + +std::unique_ptr CFile::ReadWholeFile(const std::string& path, int& size) +{ + FILE* fp = std::fopen(path.c_str(), "rb"); + + if (fp == nullptr) + RaiseError("Failed to open \"%s\" for reading.\n", path.c_str()); + + std::fseek(fp, 0, SEEK_END); + + size = std::ftell(fp); + + std::unique_ptr buffer = std::unique_ptr(new unsigned char[size]); + + std::rewind(fp); + + if (std::fread(buffer.get(), size, 1, fp) != 1) + RaiseError("Failed to read \"%s\".\n", path.c_str()); + + std::fclose(fp); + + return buffer; +} + +int ExtractData(const std::unique_ptr& buffer, int offset, int size) +{ + switch (size) + { + case 1: + return buffer[offset]; + case 2: + return (buffer[offset + 1] << 8) + | buffer[offset]; + case 4: + return (buffer[offset + 3] << 24) + | (buffer[offset + 2] << 16) + | (buffer[offset + 1] << 8) + | buffer[offset]; + default: + FATAL_ERROR("Invalid size passed to ExtractData.\n"); + } +} + +void CFile::TryConvertIncbin() +{ + std::string idents[6] = { "INCBIN_S8", "INCBIN_U8", "INCBIN_S16", "INCBIN_U16", "INCBIN_S32", "INCBIN_U32" }; + int incbinType = -1; + + for (int i = 0; i < 6; i++) + { + if (CheckIdentifier(idents[i])) + { + incbinType = i; + break; + } + } + + if (incbinType == -1) + return; + + int size = 1 << (incbinType / 2); + bool isSigned = ((incbinType % 2) == 0); + + long oldPos = m_pos; + long oldLineNum = m_lineNum; + + m_pos += idents[incbinType].length(); + + SkipWhitespace(); + + if (m_buffer[m_pos] != '(') + { + m_pos = oldPos; + m_lineNum = oldLineNum; + return; + } + + m_pos++; + + std::printf("{"); + + while (true) + { + SkipWhitespace(); + + if (m_buffer[m_pos] != '"') + RaiseError("expected double quote"); + + m_pos++; + + int startPos = m_pos; + + while (m_buffer[m_pos] != '"') + { + if (m_buffer[m_pos] == 0) + { + if (m_pos >= m_size) + RaiseError("unexpected EOF in path string"); + else + RaiseError("unexpected null character in path string"); + } + + if (m_buffer[m_pos] == '\r' || m_buffer[m_pos] == '\n') + RaiseError("unexpected end of line character in path string"); + + if (m_buffer[m_pos] == '\\') + RaiseError("unexpected escape in path string"); + + m_pos++; + } + + std::string path(&m_buffer[startPos], m_pos - startPos); + + m_pos++; + + int fileSize; + std::unique_ptr buffer = ReadWholeFile(path, fileSize); + + if ((fileSize % size) != 0) + RaiseError("Size %d doesn't evenly divide file size %d.\n", size, fileSize); + + int count = fileSize / size; + int offset = 0; + + for (int i = 0; i < count; i++) + { + int data = ExtractData(buffer, offset, size); + offset += size; + + if (isSigned) + std::printf("%d,", data); + else + std::printf("%uu,", data); + } + + SkipWhitespace(); + + if (m_buffer[m_pos] != ',') + break; + + m_pos++; + } + + if (m_buffer[m_pos] != ')') + RaiseError("expected ')'"); + + m_pos++; + + std::printf("}"); +} + +// Reports a diagnostic message. +void CFile::ReportDiagnostic(const char* type, const char* format, std::va_list args) +{ + const int bufferSize = 1024; + char buffer[bufferSize]; + std::vsnprintf(buffer, bufferSize, format, args); + std::fprintf(stderr, "%s:%ld: %s: %s\n", m_filename.c_str(), m_lineNum, type, buffer); +} + +#define DO_REPORT(type) \ +do \ +{ \ + std::va_list args; \ + va_start(args, format); \ + ReportDiagnostic(type, format, args); \ + va_end(args); \ +} while (0) + +// Reports an error diagnostic and terminates the program. +void CFile::RaiseError(const char* format, ...) +{ + DO_REPORT("error"); + std::exit(1); +} + +// Reports a warning diagnostic. +void CFile::RaiseWarning(const char* format, ...) +{ + DO_REPORT("warning"); +} diff --git a/tools/preproc/charmap.cpp b/tools/preproc/charmap.cpp index 55366bc..a7bedfe 100644 --- a/tools/preproc/charmap.cpp +++ b/tools/preproc/charmap.cpp @@ -1,408 +1,408 @@ -// Copyright(c) 2016 YamaArashi -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include -#include -#include -#include "preproc.h" -#include "charmap.h" -#include "char_util.h" -#include "utf8.h" - -enum LhsType -{ - Char, - Escape, - Constant, - None -}; - -struct Lhs -{ - LhsType type; - std::string name; - std::int32_t code; -}; - -class CharmapReader -{ -public: - CharmapReader(std::string filename); - CharmapReader(const CharmapReader&) = delete; - ~CharmapReader(); - Lhs ReadLhs(); - void ExpectEqualsSign(); - std::string ReadSequence(); - void ExpectEmptyRestOfLine(); - void RaiseError(const char* format, ...); - -private: - char* m_buffer; - long m_pos; - long m_size; - long m_lineNum; - std::string m_filename; - - void RemoveComments(); - std::string ReadConstant(); - void SkipWhitespace(); -}; - -CharmapReader::CharmapReader(std::string filename) : m_filename(filename) -{ - FILE *fp = std::fopen(filename.c_str(), "rb"); - - if (fp == NULL) - FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str()); - - std::fseek(fp, 0, SEEK_END); - - m_size = std::ftell(fp); - - if (m_size < 0) - FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str()); - - m_buffer = new char[m_size + 1]; - - std::rewind(fp); - - if (std::fread(m_buffer, m_size, 1, fp) != 1) - FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str()); - - m_buffer[m_size] = 0; - - std::fclose(fp); - - m_pos = 0; - m_lineNum = 1; - - RemoveComments(); -} - -CharmapReader::~CharmapReader() -{ - delete[] m_buffer; -} - -Lhs CharmapReader::ReadLhs() -{ - Lhs lhs; - - for (;;) - { - SkipWhitespace(); - - if (m_buffer[m_pos] == '\n') - { - m_pos++; - m_lineNum++; - } - else - { - break; - } - } - - if (m_buffer[m_pos] == '\'') - { - m_pos++; - - bool isEscape = (m_buffer[m_pos] == '\\'); - - if (isEscape) - { - m_pos++; - } - - unsigned char c = m_buffer[m_pos]; - - if (c == 0) - { - if (m_pos >= m_size) - RaiseError("unexpected EOF in UTF-8 character literal"); - else - RaiseError("unexpected null character in UTF-8 character literal"); - } - - if (IsAscii(c) && !IsAsciiPrintable(c)) - RaiseError("unexpected character U+%X in UTF-8 character literal", c); - - UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]); - std::int32_t code = unicodeChar.code; - - if (code == -1) - RaiseError("invalid encoding in UTF-8 character literal"); - - m_pos += unicodeChar.encodingLength; - - if (m_buffer[m_pos] != '\'') - RaiseError("unterminated character literal"); - - m_pos++; - - lhs.code = code; - - if (isEscape) - { - if (code >= 128) - RaiseError("escapes using non-ASCII characters are invalid"); - - switch (code) - { - case '\'': - lhs.type = LhsType::Char; - break; - case '\\': - lhs.type = LhsType::Char; - case '"': - RaiseError("cannot escape double quote"); - break; - default: - lhs.type = LhsType::Escape; - } - } - else - { - if (code == '\'') - RaiseError("empty character literal"); - - lhs.type = LhsType::Char; - } - } - else if (IsIdentifierStartingChar(m_buffer[m_pos])) - { - lhs.type = LhsType::Constant; - lhs.name = ReadConstant(); - } - else if (m_buffer[m_pos] == '\r') - { - RaiseError("only Unix-style LF newlines are supported"); - } - else if (m_buffer[m_pos] == 0) - { - if (m_pos < m_size) - RaiseError("unexpected null character"); - lhs.type = LhsType::None; - } - else - { - RaiseError("junk at start of line"); - } - - return lhs; -} - -void CharmapReader::ExpectEqualsSign() -{ - SkipWhitespace(); - - if (m_buffer[m_pos] != '=') - RaiseError("expected equals sign"); - - m_pos++; -} - -static unsigned int ConvertHexDigit(char c) -{ - unsigned int digit = 0; - - if (c >= '0' && c <= '9') - digit = c - '0'; - else if (c >= 'A' && c <= 'F') - digit = 10 + c - 'A'; - else if (c >= 'a' && c <= 'f') - digit = 10 + c - 'a'; - - return digit; -} - -std::string CharmapReader::ReadSequence() -{ - SkipWhitespace(); - - long startPos = m_pos; - - unsigned int length = 0; - - while (IsAsciiHexDigit(m_buffer[m_pos]) && IsAsciiHexDigit(m_buffer[m_pos + 1])) - { - m_pos += 2; - length++; - - if (length > kMaxCharmapSequenceLength) - RaiseError("byte sequence too long (max is %lu bytes)", kMaxCharmapSequenceLength); - - SkipWhitespace(); - } - - if (IsAsciiHexDigit(m_buffer[m_pos])) - RaiseError("each byte must have 2 hex digits"); - - if (length == 0) - RaiseError("expected byte sequence"); - - std::string sequence; - sequence.reserve(length); - - m_pos = startPos; - - for (unsigned int i = 0; i < length; i++) - { - unsigned int digit1 = ConvertHexDigit(m_buffer[m_pos]); - unsigned int digit2 = ConvertHexDigit(m_buffer[m_pos + 1]); - unsigned char byte = digit1 * 16 + digit2; - sequence += byte; - - m_pos += 2; - SkipWhitespace(); - } - - return sequence; -} - -void CharmapReader::ExpectEmptyRestOfLine() -{ - SkipWhitespace(); - - if (m_buffer[m_pos] == 0) - { - if (m_pos < m_size) - RaiseError("unexpected null character"); - } - else if (m_buffer[m_pos] == '\n') - { - m_pos++; - m_lineNum++; - } - else if (m_buffer[m_pos] == '\r') - { - RaiseError("only Unix-style LF newlines are supported"); - } - else - { - RaiseError("junk at end of line"); - } -} - -void CharmapReader::RaiseError(const char* format, ...) -{ - const int bufferSize = 1024; - char buffer[bufferSize]; - - std::va_list args; - va_start(args, format); - std::vsnprintf(buffer, bufferSize, format, args); - va_end(args); - - std::fprintf(stderr, "%s:%ld: error: %s\n", m_filename.c_str(), m_lineNum, buffer); - - std::exit(1); -} - -void CharmapReader::RemoveComments() -{ - long pos = 0; - bool inString = false; - - for (;;) - { - if (m_buffer[pos] == 0) - return; - - if (inString) - { - if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == '\'') - { - pos += 2; - } - else - { - if (m_buffer[pos] == '\'') - inString = false; - pos++; - } - } - else if (m_buffer[pos] == '@') - { - while (m_buffer[pos] != '\n' && m_buffer[pos] != 0) - m_buffer[pos++] = ' '; - } - else - { - if (m_buffer[pos] == '\'') - inString = true; - pos++; - } - } -} - -std::string CharmapReader::ReadConstant() -{ - long startPos = m_pos; - - while (IsIdentifierChar(m_buffer[m_pos])) - m_pos++; - - return std::string(&m_buffer[startPos], m_pos - startPos); -} - -void CharmapReader::SkipWhitespace() -{ - while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ') - m_pos++; -} - -Charmap::Charmap(std::string filename) -{ - CharmapReader reader(filename); - - for (;;) - { - Lhs lhs = reader.ReadLhs(); - - if (lhs.type == LhsType::None) - return; - - reader.ExpectEqualsSign(); - - std::string sequence = reader.ReadSequence(); - - switch (lhs.type) - { - case LhsType::Char: - if (m_chars.find(lhs.code) != m_chars.end()) - reader.RaiseError("redefining char"); - m_chars[lhs.code] = sequence; - break; - case LhsType::Escape: - if (m_escapes[lhs.code].length() != 0) - reader.RaiseError("redefining escape"); - m_escapes[lhs.code] = sequence; - break; - case LhsType::Constant: - if (m_constants.find(lhs.name) != m_constants.end()) - reader.RaiseError("redefining constant"); - m_constants[lhs.name] = sequence; - break; - } - - reader.ExpectEmptyRestOfLine(); - } -} +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include +#include +#include "preproc.h" +#include "charmap.h" +#include "char_util.h" +#include "utf8.h" + +enum LhsType +{ + Char, + Escape, + Constant, + None +}; + +struct Lhs +{ + LhsType type; + std::string name; + std::int32_t code; +}; + +class CharmapReader +{ +public: + CharmapReader(std::string filename); + CharmapReader(const CharmapReader&) = delete; + ~CharmapReader(); + Lhs ReadLhs(); + void ExpectEqualsSign(); + std::string ReadSequence(); + void ExpectEmptyRestOfLine(); + void RaiseError(const char* format, ...); + +private: + char* m_buffer; + long m_pos; + long m_size; + long m_lineNum; + std::string m_filename; + + void RemoveComments(); + std::string ReadConstant(); + void SkipWhitespace(); +}; + +CharmapReader::CharmapReader(std::string filename) : m_filename(filename) +{ + FILE *fp = std::fopen(filename.c_str(), "rb"); + + if (fp == NULL) + FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str()); + + std::fseek(fp, 0, SEEK_END); + + m_size = std::ftell(fp); + + if (m_size < 0) + FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str()); + + m_buffer = new char[m_size + 1]; + + std::rewind(fp); + + if (std::fread(m_buffer, m_size, 1, fp) != 1) + FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str()); + + m_buffer[m_size] = 0; + + std::fclose(fp); + + m_pos = 0; + m_lineNum = 1; + + RemoveComments(); +} + +CharmapReader::~CharmapReader() +{ + delete[] m_buffer; +} + +Lhs CharmapReader::ReadLhs() +{ + Lhs lhs; + + for (;;) + { + SkipWhitespace(); + + if (m_buffer[m_pos] == '\n') + { + m_pos++; + m_lineNum++; + } + else + { + break; + } + } + + if (m_buffer[m_pos] == '\'') + { + m_pos++; + + bool isEscape = (m_buffer[m_pos] == '\\'); + + if (isEscape) + { + m_pos++; + } + + unsigned char c = m_buffer[m_pos]; + + if (c == 0) + { + if (m_pos >= m_size) + RaiseError("unexpected EOF in UTF-8 character literal"); + else + RaiseError("unexpected null character in UTF-8 character literal"); + } + + if (IsAscii(c) && !IsAsciiPrintable(c)) + RaiseError("unexpected character U+%X in UTF-8 character literal", c); + + UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]); + std::int32_t code = unicodeChar.code; + + if (code == -1) + RaiseError("invalid encoding in UTF-8 character literal"); + + m_pos += unicodeChar.encodingLength; + + if (m_buffer[m_pos] != '\'') + RaiseError("unterminated character literal"); + + m_pos++; + + lhs.code = code; + + if (isEscape) + { + if (code >= 128) + RaiseError("escapes using non-ASCII characters are invalid"); + + switch (code) + { + case '\'': + lhs.type = LhsType::Char; + break; + case '\\': + lhs.type = LhsType::Char; + case '"': + RaiseError("cannot escape double quote"); + break; + default: + lhs.type = LhsType::Escape; + } + } + else + { + if (code == '\'') + RaiseError("empty character literal"); + + lhs.type = LhsType::Char; + } + } + else if (IsIdentifierStartingChar(m_buffer[m_pos])) + { + lhs.type = LhsType::Constant; + lhs.name = ReadConstant(); + } + else if (m_buffer[m_pos] == '\r') + { + RaiseError("only Unix-style LF newlines are supported"); + } + else if (m_buffer[m_pos] == 0) + { + if (m_pos < m_size) + RaiseError("unexpected null character"); + lhs.type = LhsType::None; + } + else + { + RaiseError("junk at start of line"); + } + + return lhs; +} + +void CharmapReader::ExpectEqualsSign() +{ + SkipWhitespace(); + + if (m_buffer[m_pos] != '=') + RaiseError("expected equals sign"); + + m_pos++; +} + +static unsigned int ConvertHexDigit(char c) +{ + unsigned int digit = 0; + + if (c >= '0' && c <= '9') + digit = c - '0'; + else if (c >= 'A' && c <= 'F') + digit = 10 + c - 'A'; + else if (c >= 'a' && c <= 'f') + digit = 10 + c - 'a'; + + return digit; +} + +std::string CharmapReader::ReadSequence() +{ + SkipWhitespace(); + + long startPos = m_pos; + + unsigned int length = 0; + + while (IsAsciiHexDigit(m_buffer[m_pos]) && IsAsciiHexDigit(m_buffer[m_pos + 1])) + { + m_pos += 2; + length++; + + if (length > kMaxCharmapSequenceLength) + RaiseError("byte sequence too long (max is %lu bytes)", kMaxCharmapSequenceLength); + + SkipWhitespace(); + } + + if (IsAsciiHexDigit(m_buffer[m_pos])) + RaiseError("each byte must have 2 hex digits"); + + if (length == 0) + RaiseError("expected byte sequence"); + + std::string sequence; + sequence.reserve(length); + + m_pos = startPos; + + for (unsigned int i = 0; i < length; i++) + { + unsigned int digit1 = ConvertHexDigit(m_buffer[m_pos]); + unsigned int digit2 = ConvertHexDigit(m_buffer[m_pos + 1]); + unsigned char byte = digit1 * 16 + digit2; + sequence += byte; + + m_pos += 2; + SkipWhitespace(); + } + + return sequence; +} + +void CharmapReader::ExpectEmptyRestOfLine() +{ + SkipWhitespace(); + + if (m_buffer[m_pos] == 0) + { + if (m_pos < m_size) + RaiseError("unexpected null character"); + } + else if (m_buffer[m_pos] == '\n') + { + m_pos++; + m_lineNum++; + } + else if (m_buffer[m_pos] == '\r') + { + RaiseError("only Unix-style LF newlines are supported"); + } + else + { + RaiseError("junk at end of line"); + } +} + +void CharmapReader::RaiseError(const char* format, ...) +{ + const int bufferSize = 1024; + char buffer[bufferSize]; + + std::va_list args; + va_start(args, format); + std::vsnprintf(buffer, bufferSize, format, args); + va_end(args); + + std::fprintf(stderr, "%s:%ld: error: %s\n", m_filename.c_str(), m_lineNum, buffer); + + std::exit(1); +} + +void CharmapReader::RemoveComments() +{ + long pos = 0; + bool inString = false; + + for (;;) + { + if (m_buffer[pos] == 0) + return; + + if (inString) + { + if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == '\'') + { + pos += 2; + } + else + { + if (m_buffer[pos] == '\'') + inString = false; + pos++; + } + } + else if (m_buffer[pos] == '@') + { + while (m_buffer[pos] != '\n' && m_buffer[pos] != 0) + m_buffer[pos++] = ' '; + } + else + { + if (m_buffer[pos] == '\'') + inString = true; + pos++; + } + } +} + +std::string CharmapReader::ReadConstant() +{ + long startPos = m_pos; + + while (IsIdentifierChar(m_buffer[m_pos])) + m_pos++; + + return std::string(&m_buffer[startPos], m_pos - startPos); +} + +void CharmapReader::SkipWhitespace() +{ + while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ') + m_pos++; +} + +Charmap::Charmap(std::string filename) +{ + CharmapReader reader(filename); + + for (;;) + { + Lhs lhs = reader.ReadLhs(); + + if (lhs.type == LhsType::None) + return; + + reader.ExpectEqualsSign(); + + std::string sequence = reader.ReadSequence(); + + switch (lhs.type) + { + case LhsType::Char: + if (m_chars.find(lhs.code) != m_chars.end()) + reader.RaiseError("redefining char"); + m_chars[lhs.code] = sequence; + break; + case LhsType::Escape: + if (m_escapes[lhs.code].length() != 0) + reader.RaiseError("redefining escape"); + m_escapes[lhs.code] = sequence; + break; + case LhsType::Constant: + if (m_constants.find(lhs.name) != m_constants.end()) + reader.RaiseError("redefining constant"); + m_constants[lhs.name] = sequence; + break; + } + + reader.ExpectEmptyRestOfLine(); + } +} diff --git a/tools/preproc/preproc.cpp b/tools/preproc/preproc.cpp index 41856f7..c9c6042 100644 --- a/tools/preproc/preproc.cpp +++ b/tools/preproc/preproc.cpp @@ -1,156 +1,156 @@ -// Copyright(c) 2016 YamaArashi -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include -#include -#include "preproc.h" -#include "asm_file.h" -#include "c_file.h" -#include "charmap.h" - -Charmap* g_charmap; - -void PrintAsmBytes(unsigned char *s, int length) -{ - if (length > 0) - { - std::printf("\t.byte "); - for (int i = 0; i < length; i++) - { - std::printf("0x%02X", s[i]); - - if (i < length - 1) - std::printf(", "); - } - std::putchar('\n'); - } -} - -void PreprocAsmFile(std::string filename) -{ - std::stack stack; - - stack.push(AsmFile(filename)); - - for (;;) - { - while (stack.top().IsAtEnd()) - { - stack.pop(); - - if (stack.empty()) - return; - else - stack.top().OutputLocation(); - } - - Directive directive = stack.top().GetDirective(); - - switch (directive) - { - case Directive::Include: - stack.push(AsmFile(stack.top().ReadPath())); - stack.top().OutputLocation(); - break; - case Directive::String: - { - unsigned char s[kMaxStringLength]; - int length = stack.top().ReadString(s); - PrintAsmBytes(s, length); - break; - } - case Directive::Braille: - { - unsigned char s[kMaxStringLength]; - int length = stack.top().ReadBraille(s); - PrintAsmBytes(s, length); - break; - } - case Directive::Unknown: - { - std::string globalLabel = stack.top().GetGlobalLabel(); - - if (globalLabel.length() != 0) - { - const char *s = globalLabel.c_str(); - std::printf("%s: ; .global %s\n", s, s); - } - else - { - stack.top().OutputLine(); - } - - break; - } - } - } -} - -void PreprocCFile(std::string filename) -{ - CFile cFile(filename); - cFile.Preproc(); -} - -char* GetFileExtension(char* filename) -{ - char* extension = filename; - - while (*extension != 0) - extension++; - - while (extension > filename && *extension != '.') - extension--; - - if (extension == filename) - return nullptr; - - extension++; - - if (*extension == 0) - return nullptr; - - return extension; -} - -int main(int argc, char **argv) -{ - if (argc != 3) - { - std::fprintf(stderr, "Usage: %s SRC_FILE CHARMAP_FILE", argv[0]); - return 1; - } - - g_charmap = new Charmap(argv[2]); - - char* extension = GetFileExtension(argv[1]); - - if (!extension) - FATAL_ERROR("\"%s\" has no file extension.\n", argv[1]); - - if ((extension[0] == 's') && extension[1] == 0) - PreprocAsmFile(argv[1]); - else if ((extension[0] == 'c' || extension[0] == 'i') && extension[1] == 0) - PreprocCFile(argv[1]); - else - FATAL_ERROR("\"%s\" has an unknown file extension of \"%s\".\n", argv[1], extension); - - return 0; -} +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include +#include "preproc.h" +#include "asm_file.h" +#include "c_file.h" +#include "charmap.h" + +Charmap* g_charmap; + +void PrintAsmBytes(unsigned char *s, int length) +{ + if (length > 0) + { + std::printf("\t.byte "); + for (int i = 0; i < length; i++) + { + std::printf("0x%02X", s[i]); + + if (i < length - 1) + std::printf(", "); + } + std::putchar('\n'); + } +} + +void PreprocAsmFile(std::string filename) +{ + std::stack stack; + + stack.push(AsmFile(filename)); + + for (;;) + { + while (stack.top().IsAtEnd()) + { + stack.pop(); + + if (stack.empty()) + return; + else + stack.top().OutputLocation(); + } + + Directive directive = stack.top().GetDirective(); + + switch (directive) + { + case Directive::Include: + stack.push(AsmFile(stack.top().ReadPath())); + stack.top().OutputLocation(); + break; + case Directive::String: + { + unsigned char s[kMaxStringLength]; + int length = stack.top().ReadString(s); + PrintAsmBytes(s, length); + break; + } + case Directive::Braille: + { + unsigned char s[kMaxStringLength]; + int length = stack.top().ReadBraille(s); + PrintAsmBytes(s, length); + break; + } + case Directive::Unknown: + { + std::string globalLabel = stack.top().GetGlobalLabel(); + + if (globalLabel.length() != 0) + { + const char *s = globalLabel.c_str(); + std::printf("%s: ; .global %s\n", s, s); + } + else + { + stack.top().OutputLine(); + } + + break; + } + } + } +} + +void PreprocCFile(std::string filename) +{ + CFile cFile(filename); + cFile.Preproc(); +} + +char* GetFileExtension(char* filename) +{ + char* extension = filename; + + while (*extension != 0) + extension++; + + while (extension > filename && *extension != '.') + extension--; + + if (extension == filename) + return nullptr; + + extension++; + + if (*extension == 0) + return nullptr; + + return extension; +} + +int main(int argc, char **argv) +{ + if (argc != 3) + { + std::fprintf(stderr, "Usage: %s SRC_FILE CHARMAP_FILE", argv[0]); + return 1; + } + + g_charmap = new Charmap(argv[2]); + + char* extension = GetFileExtension(argv[1]); + + if (!extension) + FATAL_ERROR("\"%s\" has no file extension.\n", argv[1]); + + if ((extension[0] == 's') && extension[1] == 0) + PreprocAsmFile(argv[1]); + else if ((extension[0] == 'c' || extension[0] == 'i') && extension[1] == 0) + PreprocCFile(argv[1]); + else + FATAL_ERROR("\"%s\" has an unknown file extension of \"%s\".\n", argv[1], extension); + + return 0; +} diff --git a/tools/preproc/string_parser.cpp b/tools/preproc/string_parser.cpp index b0a4875..dd5196a 100644 --- a/tools/preproc/string_parser.cpp +++ b/tools/preproc/string_parser.cpp @@ -1,355 +1,355 @@ -// Copyright(c) 2016 YamaArashi -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include -#include -#include -#include "preproc.h" -#include "string_parser.h" -#include "char_util.h" -#include "utf8.h" - -// Reads a charmap char or escape sequence. -std::string StringParser::ReadCharOrEscape() -{ - std::string sequence; - - bool isEscape = (m_buffer[m_pos] == '\\'); - - if (isEscape) - { - m_pos++; - - if (m_buffer[m_pos] == '"') - { - sequence = g_charmap->Char('"'); - - if (sequence.length() == 0) - RaiseError("no mapping exists for double quote"); - - return sequence; - } - else if (m_buffer[m_pos] == '\\') - { - sequence = g_charmap->Char('\\'); - - if (sequence.length() == 0) - RaiseError("no mapping exists for backslash"); - - return sequence; - } - } - - unsigned char c = m_buffer[m_pos]; - - if (c == 0) - { - if (m_pos >= m_size) - RaiseError("unexpected EOF in UTF-8 string"); - else - RaiseError("unexpected null character in UTF-8 string"); - } - - if (IsAscii(c) && !IsAsciiPrintable(c)) - RaiseError("unexpected character U+%X in UTF-8 string", c); - - UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]); - m_pos += unicodeChar.encodingLength; - std::int32_t code = unicodeChar.code; - - if (code == -1) - RaiseError("invalid encoding in UTF-8 string"); - - if (isEscape && code >= 128) - RaiseError("escapes using non-ASCII characters are invalid"); - - sequence = isEscape ? g_charmap->Escape(code) : g_charmap->Char(code); - - if (sequence.length() == 0) - { - if (isEscape) - RaiseError("unknown escape '\\%c'", code); - else - RaiseError("unknown character U+%X", code); - } - - return sequence; -} - -// Reads a charmap constant, i.e. "{FOO}". -std::string StringParser::ReadBracketedConstants() -{ - std::string totalSequence; - - m_pos++; // Assume we're on the left curly bracket. - - while (m_buffer[m_pos] != '}') - { - SkipWhitespace(); - - if (IsIdentifierStartingChar(m_buffer[m_pos])) - { - long startPos = m_pos; - - m_pos++; - - while (IsIdentifierChar(m_buffer[m_pos])) - m_pos++; - - std::string sequence = g_charmap->Constant(std::string(&m_buffer[startPos], m_pos - startPos)); - - if (sequence.length() == 0) - { - m_buffer[m_pos] = 0; - RaiseError("unknown constant '%s'", &m_buffer[startPos]); - } - - totalSequence += sequence; - } - else if (IsAsciiDigit(m_buffer[m_pos])) - { - Integer integer = ReadInteger(); - - switch (integer.size) - { - case 1: - totalSequence += (unsigned char)integer.value; - break; - case 2: - totalSequence += (unsigned char)integer.value; - totalSequence += (unsigned char)(integer.value >> 8); - break; - case 4: - totalSequence += (unsigned char)integer.value; - totalSequence += (unsigned char)(integer.value >> 8); - totalSequence += (unsigned char)(integer.value >> 16); - totalSequence += (unsigned char)(integer.value >> 24); - break; - } - } - else if (m_buffer[m_pos] == 0) - { - if (m_pos >= m_size) - RaiseError("unexpected EOF after left curly bracket"); - else - RaiseError("unexpected null character within curly brackets"); - } - else - { - if (IsAsciiPrintable(m_buffer[m_pos])) - RaiseError("unexpected character '%c' within curly brackets", m_buffer[m_pos]); - else - RaiseError("unexpected character '\\x%02X' within curly brackets", m_buffer[m_pos]); - } - } - - m_pos++; // Go past the right curly bracket. - - return totalSequence; -} - -// Reads a charmap string. -int StringParser::ParseString(long srcPos, unsigned char* dest, int& destLength) -{ - m_pos = srcPos; - - if (m_buffer[m_pos] != '"') - RaiseError("expected UTF-8 string literal"); - - long start = m_pos; - - m_pos++; - - destLength = 0; - - while (m_buffer[m_pos] != '"') - { - std::string sequence = (m_buffer[m_pos] == '{') ? ReadBracketedConstants() : ReadCharOrEscape(); - - for (const char& c : sequence) - { - if (destLength == kMaxStringLength) - RaiseError("mapped string longer than %d bytes", kMaxStringLength); - - dest[destLength++] = c; - } - } - - m_pos++; // Go past the right quote. - - return m_pos - start; -} - -void StringParser::RaiseError(const char* format, ...) -{ - const int bufferSize = 1024; - char buffer[bufferSize]; - - std::va_list args; - va_start(args, format); - std::vsnprintf(buffer, bufferSize, format, args); - va_end(args); - - throw std::runtime_error(buffer); -} - -// Converts digit character to numerical value. -static int ConvertDigit(char c, int radix) -{ - int digit; - - if (c >= '0' && c <= '9') - digit = c - '0'; - else if (c >= 'A' && c <= 'F') - digit = 10 + c - 'A'; - else if (c >= 'a' && c <= 'f') - digit = 10 + c - 'a'; - else - return -1; - - return (digit < radix) ? digit : -1; -} - -void StringParser::SkipRestOfInteger(int radix) -{ - while (ConvertDigit(m_buffer[m_pos], radix) != -1) - m_pos++; -} - -StringParser::Integer StringParser::ReadDecimal() -{ - const int radix = 10; - std::uint64_t n = 0; - int digit; - std::uint64_t max = UINT32_MAX; - long startPos = m_pos; - - while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1) - { - n = n * radix + digit; - - if (n >= max) - { - SkipRestOfInteger(radix); - - std::string intLiteral(m_buffer + startPos, m_pos - startPos); - RaiseError("integer literal \"%s\" is too large", intLiteral.c_str()); - } - - m_pos++; - } - - int size; - - if (m_buffer[m_pos] == 'H') - { - if (n >= 0x10000) - { - RaiseError("%lu is too large to be a halfword", (unsigned long)n); - } - - size = 2; - m_pos++; - } - else if (m_buffer[m_pos] == 'W') - { - size = 4; - m_pos++; - } - else - { - if (n >= 0x10000) - size = 4; - else if (n >= 0x100) - size = 2; - else - size = 1; - } - - return{ static_cast(n), size }; -} - -StringParser::Integer StringParser::ReadHex() -{ - const int radix = 16; - std::uint64_t n = 0; - int digit; - std::uint64_t max = UINT32_MAX; - long startPos = m_pos; - - while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1) - { - n = n * radix + digit; - - if (n >= max) - { - SkipRestOfInteger(radix); - - std::string intLiteral(m_buffer + startPos, m_pos - startPos); - RaiseError("integer literal \"%s\" is too large", intLiteral.c_str()); - } - - m_pos++; - } - - int length = m_pos - startPos; - int size = 0; - - switch (length) - { - case 2: - size = 1; - break; - case 4: - size = 2; - break; - case 8: - size = 4; - break; - default: - { - std::string intLiteral(m_buffer + startPos, m_pos - startPos); - RaiseError("hex integer literal \"0x%s\" doesn't have length of 2, 4, or 8 digits", intLiteral.c_str()); - } - } - - return{ static_cast(n), size }; -} - -StringParser::Integer StringParser::ReadInteger() -{ - if (!IsAsciiDigit(m_buffer[m_pos])) - RaiseError("expected integer"); - - if (m_buffer[m_pos] == '0' && m_buffer[m_pos + 1] == 'x') - { - m_pos += 2; - return ReadHex(); - } - - return ReadDecimal(); -} - -// Skips tabs and spaces. -void StringParser::SkipWhitespace() -{ - while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ') - m_pos++; -} +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include +#include +#include "preproc.h" +#include "string_parser.h" +#include "char_util.h" +#include "utf8.h" + +// Reads a charmap char or escape sequence. +std::string StringParser::ReadCharOrEscape() +{ + std::string sequence; + + bool isEscape = (m_buffer[m_pos] == '\\'); + + if (isEscape) + { + m_pos++; + + if (m_buffer[m_pos] == '"') + { + sequence = g_charmap->Char('"'); + + if (sequence.length() == 0) + RaiseError("no mapping exists for double quote"); + + return sequence; + } + else if (m_buffer[m_pos] == '\\') + { + sequence = g_charmap->Char('\\'); + + if (sequence.length() == 0) + RaiseError("no mapping exists for backslash"); + + return sequence; + } + } + + unsigned char c = m_buffer[m_pos]; + + if (c == 0) + { + if (m_pos >= m_size) + RaiseError("unexpected EOF in UTF-8 string"); + else + RaiseError("unexpected null character in UTF-8 string"); + } + + if (IsAscii(c) && !IsAsciiPrintable(c)) + RaiseError("unexpected character U+%X in UTF-8 string", c); + + UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]); + m_pos += unicodeChar.encodingLength; + std::int32_t code = unicodeChar.code; + + if (code == -1) + RaiseError("invalid encoding in UTF-8 string"); + + if (isEscape && code >= 128) + RaiseError("escapes using non-ASCII characters are invalid"); + + sequence = isEscape ? g_charmap->Escape(code) : g_charmap->Char(code); + + if (sequence.length() == 0) + { + if (isEscape) + RaiseError("unknown escape '\\%c'", code); + else + RaiseError("unknown character U+%X", code); + } + + return sequence; +} + +// Reads a charmap constant, i.e. "{FOO}". +std::string StringParser::ReadBracketedConstants() +{ + std::string totalSequence; + + m_pos++; // Assume we're on the left curly bracket. + + while (m_buffer[m_pos] != '}') + { + SkipWhitespace(); + + if (IsIdentifierStartingChar(m_buffer[m_pos])) + { + long startPos = m_pos; + + m_pos++; + + while (IsIdentifierChar(m_buffer[m_pos])) + m_pos++; + + std::string sequence = g_charmap->Constant(std::string(&m_buffer[startPos], m_pos - startPos)); + + if (sequence.length() == 0) + { + m_buffer[m_pos] = 0; + RaiseError("unknown constant '%s'", &m_buffer[startPos]); + } + + totalSequence += sequence; + } + else if (IsAsciiDigit(m_buffer[m_pos])) + { + Integer integer = ReadInteger(); + + switch (integer.size) + { + case 1: + totalSequence += (unsigned char)integer.value; + break; + case 2: + totalSequence += (unsigned char)integer.value; + totalSequence += (unsigned char)(integer.value >> 8); + break; + case 4: + totalSequence += (unsigned char)integer.value; + totalSequence += (unsigned char)(integer.value >> 8); + totalSequence += (unsigned char)(integer.value >> 16); + totalSequence += (unsigned char)(integer.value >> 24); + break; + } + } + else if (m_buffer[m_pos] == 0) + { + if (m_pos >= m_size) + RaiseError("unexpected EOF after left curly bracket"); + else + RaiseError("unexpected null character within curly brackets"); + } + else + { + if (IsAsciiPrintable(m_buffer[m_pos])) + RaiseError("unexpected character '%c' within curly brackets", m_buffer[m_pos]); + else + RaiseError("unexpected character '\\x%02X' within curly brackets", m_buffer[m_pos]); + } + } + + m_pos++; // Go past the right curly bracket. + + return totalSequence; +} + +// Reads a charmap string. +int StringParser::ParseString(long srcPos, unsigned char* dest, int& destLength) +{ + m_pos = srcPos; + + if (m_buffer[m_pos] != '"') + RaiseError("expected UTF-8 string literal"); + + long start = m_pos; + + m_pos++; + + destLength = 0; + + while (m_buffer[m_pos] != '"') + { + std::string sequence = (m_buffer[m_pos] == '{') ? ReadBracketedConstants() : ReadCharOrEscape(); + + for (const char& c : sequence) + { + if (destLength == kMaxStringLength) + RaiseError("mapped string longer than %d bytes", kMaxStringLength); + + dest[destLength++] = c; + } + } + + m_pos++; // Go past the right quote. + + return m_pos - start; +} + +void StringParser::RaiseError(const char* format, ...) +{ + const int bufferSize = 1024; + char buffer[bufferSize]; + + std::va_list args; + va_start(args, format); + std::vsnprintf(buffer, bufferSize, format, args); + va_end(args); + + throw std::runtime_error(buffer); +} + +// Converts digit character to numerical value. +static int ConvertDigit(char c, int radix) +{ + int digit; + + if (c >= '0' && c <= '9') + digit = c - '0'; + else if (c >= 'A' && c <= 'F') + digit = 10 + c - 'A'; + else if (c >= 'a' && c <= 'f') + digit = 10 + c - 'a'; + else + return -1; + + return (digit < radix) ? digit : -1; +} + +void StringParser::SkipRestOfInteger(int radix) +{ + while (ConvertDigit(m_buffer[m_pos], radix) != -1) + m_pos++; +} + +StringParser::Integer StringParser::ReadDecimal() +{ + const int radix = 10; + std::uint64_t n = 0; + int digit; + std::uint64_t max = UINT32_MAX; + long startPos = m_pos; + + while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1) + { + n = n * radix + digit; + + if (n >= max) + { + SkipRestOfInteger(radix); + + std::string intLiteral(m_buffer + startPos, m_pos - startPos); + RaiseError("integer literal \"%s\" is too large", intLiteral.c_str()); + } + + m_pos++; + } + + int size; + + if (m_buffer[m_pos] == 'H') + { + if (n >= 0x10000) + { + RaiseError("%lu is too large to be a halfword", (unsigned long)n); + } + + size = 2; + m_pos++; + } + else if (m_buffer[m_pos] == 'W') + { + size = 4; + m_pos++; + } + else + { + if (n >= 0x10000) + size = 4; + else if (n >= 0x100) + size = 2; + else + size = 1; + } + + return{ static_cast(n), size }; +} + +StringParser::Integer StringParser::ReadHex() +{ + const int radix = 16; + std::uint64_t n = 0; + int digit; + std::uint64_t max = UINT32_MAX; + long startPos = m_pos; + + while ((digit = ConvertDigit(m_buffer[m_pos], radix)) != -1) + { + n = n * radix + digit; + + if (n >= max) + { + SkipRestOfInteger(radix); + + std::string intLiteral(m_buffer + startPos, m_pos - startPos); + RaiseError("integer literal \"%s\" is too large", intLiteral.c_str()); + } + + m_pos++; + } + + int length = m_pos - startPos; + int size = 0; + + switch (length) + { + case 2: + size = 1; + break; + case 4: + size = 2; + break; + case 8: + size = 4; + break; + default: + { + std::string intLiteral(m_buffer + startPos, m_pos - startPos); + RaiseError("hex integer literal \"0x%s\" doesn't have length of 2, 4, or 8 digits", intLiteral.c_str()); + } + } + + return{ static_cast(n), size }; +} + +StringParser::Integer StringParser::ReadInteger() +{ + if (!IsAsciiDigit(m_buffer[m_pos])) + RaiseError("expected integer"); + + if (m_buffer[m_pos] == '0' && m_buffer[m_pos + 1] == 'x') + { + m_pos += 2; + return ReadHex(); + } + + return ReadDecimal(); +} + +// Skips tabs and spaces. +void StringParser::SkipWhitespace() +{ + while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ') + m_pos++; +} diff --git a/tools/preproc/utf8.cpp b/tools/preproc/utf8.cpp index 8ed10a1..7facfd4 100644 --- a/tools/preproc/utf8.cpp +++ b/tools/preproc/utf8.cpp @@ -1,92 +1,92 @@ -// Copyright (c) 2008-2009 Bjoern Hoehrmann -// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. -// -// Copyright(c) 2016 YamaArashi -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include -#include "utf8.h" - -static const unsigned char s_byteTypeTable[] = -{ - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf - 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df - 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef - 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff -}; - -const unsigned char s0 = 0 * 12; -const unsigned char s1 = 1 * 12; -const unsigned char s2 = 2 * 12; -const unsigned char s3 = 3 * 12; -const unsigned char s4 = 4 * 12; -const unsigned char s5 = 5 * 12; -const unsigned char s6 = 6 * 12; -const unsigned char s7 = 7 * 12; -const unsigned char s8 = 8 * 12; - -static const unsigned char s_transitionTable[] = -{ - s0,s1,s2,s3,s5,s8,s7,s1,s1,s1,s4,s6, // s0 - s1,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1, // s1 - s1,s0,s1,s1,s1,s1,s1,s0,s1,s0,s1,s1, // s2 - s1,s2,s1,s1,s1,s1,s1,s2,s1,s2,s1,s1, // s3 - s1,s1,s1,s1,s1,s1,s1,s2,s1,s1,s1,s1, // s4 - s1,s2,s1,s1,s1,s1,s1,s1,s1,s2,s1,s1, // s5 - s1,s1,s1,s1,s1,s1,s1,s3,s1,s3,s1,s1, // s6 - s1,s3,s1,s1,s1,s1,s1,s3,s1,s3,s1,s1, // s7 - s1,s3,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1, // s8 -}; - -// Decodes UTF-8 encoded Unicode code point at "s". -UnicodeChar DecodeUtf8(const char* s) -{ - UnicodeChar unicodeChar; - int state = s0; - auto start = s; - - do - { - unsigned char byte = *s++; - int type = s_byteTypeTable[byte]; - - if (state == s0) - unicodeChar.code = (0xFF >> type) & byte; - else - unicodeChar.code = (unicodeChar.code << 6) | (byte & 0x3F); - - state = s_transitionTable[state + type]; - - if (state == s1) - { - unicodeChar.code = -1; - return unicodeChar; - } - } while (state != s0); - - unicodeChar.encodingLength = s - start; - - return unicodeChar; -} +// Copyright (c) 2008-2009 Bjoern Hoehrmann +// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. +// +// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include "utf8.h" + +static const unsigned char s_byteTypeTable[] = +{ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df + 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef + 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff +}; + +const unsigned char s0 = 0 * 12; +const unsigned char s1 = 1 * 12; +const unsigned char s2 = 2 * 12; +const unsigned char s3 = 3 * 12; +const unsigned char s4 = 4 * 12; +const unsigned char s5 = 5 * 12; +const unsigned char s6 = 6 * 12; +const unsigned char s7 = 7 * 12; +const unsigned char s8 = 8 * 12; + +static const unsigned char s_transitionTable[] = +{ + s0,s1,s2,s3,s5,s8,s7,s1,s1,s1,s4,s6, // s0 + s1,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1, // s1 + s1,s0,s1,s1,s1,s1,s1,s0,s1,s0,s1,s1, // s2 + s1,s2,s1,s1,s1,s1,s1,s2,s1,s2,s1,s1, // s3 + s1,s1,s1,s1,s1,s1,s1,s2,s1,s1,s1,s1, // s4 + s1,s2,s1,s1,s1,s1,s1,s1,s1,s2,s1,s1, // s5 + s1,s1,s1,s1,s1,s1,s1,s3,s1,s3,s1,s1, // s6 + s1,s3,s1,s1,s1,s1,s1,s3,s1,s3,s1,s1, // s7 + s1,s3,s1,s1,s1,s1,s1,s1,s1,s1,s1,s1, // s8 +}; + +// Decodes UTF-8 encoded Unicode code point at "s". +UnicodeChar DecodeUtf8(const char* s) +{ + UnicodeChar unicodeChar; + int state = s0; + auto start = s; + + do + { + unsigned char byte = *s++; + int type = s_byteTypeTable[byte]; + + if (state == s0) + unicodeChar.code = (0xFF >> type) & byte; + else + unicodeChar.code = (unicodeChar.code << 6) | (byte & 0x3F); + + state = s_transitionTable[state + type]; + + if (state == s1) + { + unicodeChar.code = -1; + return unicodeChar; + } + } while (state != s0); + + unicodeChar.encodingLength = s - start; + + return unicodeChar; +} -- cgit v1.2.3