diff options
Diffstat (limited to 'tools/preproc/charmap.cpp')
-rw-r--r-- | tools/preproc/charmap.cpp | 816 |
1 files changed, 408 insertions, 408 deletions
diff --git a/tools/preproc/charmap.cpp b/tools/preproc/charmap.cpp index 55366bc..a7bedfe 100644 --- a/tools/preproc/charmap.cpp +++ b/tools/preproc/charmap.cpp @@ -1,408 +1,408 @@ -// Copyright(c) 2016 YamaArashi
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-#include <cstdio>
-#include <cstdint>
-#include <cstdarg>
-#include "preproc.h"
-#include "charmap.h"
-#include "char_util.h"
-#include "utf8.h"
-
-enum LhsType
-{
- Char,
- Escape,
- Constant,
- None
-};
-
-struct Lhs
-{
- LhsType type;
- std::string name;
- std::int32_t code;
-};
-
-class CharmapReader
-{
-public:
- CharmapReader(std::string filename);
- CharmapReader(const CharmapReader&) = delete;
- ~CharmapReader();
- Lhs ReadLhs();
- void ExpectEqualsSign();
- std::string ReadSequence();
- void ExpectEmptyRestOfLine();
- void RaiseError(const char* format, ...);
-
-private:
- char* m_buffer;
- long m_pos;
- long m_size;
- long m_lineNum;
- std::string m_filename;
-
- void RemoveComments();
- std::string ReadConstant();
- void SkipWhitespace();
-};
-
-CharmapReader::CharmapReader(std::string filename) : m_filename(filename)
-{
- FILE *fp = std::fopen(filename.c_str(), "rb");
-
- if (fp == NULL)
- FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str());
-
- std::fseek(fp, 0, SEEK_END);
-
- m_size = std::ftell(fp);
-
- if (m_size < 0)
- FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str());
-
- m_buffer = new char[m_size + 1];
-
- std::rewind(fp);
-
- if (std::fread(m_buffer, m_size, 1, fp) != 1)
- FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str());
-
- m_buffer[m_size] = 0;
-
- std::fclose(fp);
-
- m_pos = 0;
- m_lineNum = 1;
-
- RemoveComments();
-}
-
-CharmapReader::~CharmapReader()
-{
- delete[] m_buffer;
-}
-
-Lhs CharmapReader::ReadLhs()
-{
- Lhs lhs;
-
- for (;;)
- {
- SkipWhitespace();
-
- if (m_buffer[m_pos] == '\n')
- {
- m_pos++;
- m_lineNum++;
- }
- else
- {
- break;
- }
- }
-
- if (m_buffer[m_pos] == '\'')
- {
- m_pos++;
-
- bool isEscape = (m_buffer[m_pos] == '\\');
-
- if (isEscape)
- {
- m_pos++;
- }
-
- unsigned char c = m_buffer[m_pos];
-
- if (c == 0)
- {
- if (m_pos >= m_size)
- RaiseError("unexpected EOF in UTF-8 character literal");
- else
- RaiseError("unexpected null character in UTF-8 character literal");
- }
-
- if (IsAscii(c) && !IsAsciiPrintable(c))
- RaiseError("unexpected character U+%X in UTF-8 character literal", c);
-
- UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]);
- std::int32_t code = unicodeChar.code;
-
- if (code == -1)
- RaiseError("invalid encoding in UTF-8 character literal");
-
- m_pos += unicodeChar.encodingLength;
-
- if (m_buffer[m_pos] != '\'')
- RaiseError("unterminated character literal");
-
- m_pos++;
-
- lhs.code = code;
-
- if (isEscape)
- {
- if (code >= 128)
- RaiseError("escapes using non-ASCII characters are invalid");
-
- switch (code)
- {
- case '\'':
- lhs.type = LhsType::Char;
- break;
- case '\\':
- lhs.type = LhsType::Char;
- case '"':
- RaiseError("cannot escape double quote");
- break;
- default:
- lhs.type = LhsType::Escape;
- }
- }
- else
- {
- if (code == '\'')
- RaiseError("empty character literal");
-
- lhs.type = LhsType::Char;
- }
- }
- else if (IsIdentifierStartingChar(m_buffer[m_pos]))
- {
- lhs.type = LhsType::Constant;
- lhs.name = ReadConstant();
- }
- else if (m_buffer[m_pos] == '\r')
- {
- RaiseError("only Unix-style LF newlines are supported");
- }
- else if (m_buffer[m_pos] == 0)
- {
- if (m_pos < m_size)
- RaiseError("unexpected null character");
- lhs.type = LhsType::None;
- }
- else
- {
- RaiseError("junk at start of line");
- }
-
- return lhs;
-}
-
-void CharmapReader::ExpectEqualsSign()
-{
- SkipWhitespace();
-
- if (m_buffer[m_pos] != '=')
- RaiseError("expected equals sign");
-
- m_pos++;
-}
-
-static unsigned int ConvertHexDigit(char c)
-{
- unsigned int digit = 0;
-
- if (c >= '0' && c <= '9')
- digit = c - '0';
- else if (c >= 'A' && c <= 'F')
- digit = 10 + c - 'A';
- else if (c >= 'a' && c <= 'f')
- digit = 10 + c - 'a';
-
- return digit;
-}
-
-std::string CharmapReader::ReadSequence()
-{
- SkipWhitespace();
-
- long startPos = m_pos;
-
- unsigned int length = 0;
-
- while (IsAsciiHexDigit(m_buffer[m_pos]) && IsAsciiHexDigit(m_buffer[m_pos + 1]))
- {
- m_pos += 2;
- length++;
-
- if (length > kMaxCharmapSequenceLength)
- RaiseError("byte sequence too long (max is %lu bytes)", kMaxCharmapSequenceLength);
-
- SkipWhitespace();
- }
-
- if (IsAsciiHexDigit(m_buffer[m_pos]))
- RaiseError("each byte must have 2 hex digits");
-
- if (length == 0)
- RaiseError("expected byte sequence");
-
- std::string sequence;
- sequence.reserve(length);
-
- m_pos = startPos;
-
- for (unsigned int i = 0; i < length; i++)
- {
- unsigned int digit1 = ConvertHexDigit(m_buffer[m_pos]);
- unsigned int digit2 = ConvertHexDigit(m_buffer[m_pos + 1]);
- unsigned char byte = digit1 * 16 + digit2;
- sequence += byte;
-
- m_pos += 2;
- SkipWhitespace();
- }
-
- return sequence;
-}
-
-void CharmapReader::ExpectEmptyRestOfLine()
-{
- SkipWhitespace();
-
- if (m_buffer[m_pos] == 0)
- {
- if (m_pos < m_size)
- RaiseError("unexpected null character");
- }
- else if (m_buffer[m_pos] == '\n')
- {
- m_pos++;
- m_lineNum++;
- }
- else if (m_buffer[m_pos] == '\r')
- {
- RaiseError("only Unix-style LF newlines are supported");
- }
- else
- {
- RaiseError("junk at end of line");
- }
-}
-
-void CharmapReader::RaiseError(const char* format, ...)
-{
- const int bufferSize = 1024;
- char buffer[bufferSize];
-
- std::va_list args;
- va_start(args, format);
- std::vsnprintf(buffer, bufferSize, format, args);
- va_end(args);
-
- std::fprintf(stderr, "%s:%ld: error: %s\n", m_filename.c_str(), m_lineNum, buffer);
-
- std::exit(1);
-}
-
-void CharmapReader::RemoveComments()
-{
- long pos = 0;
- bool inString = false;
-
- for (;;)
- {
- if (m_buffer[pos] == 0)
- return;
-
- if (inString)
- {
- if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == '\'')
- {
- pos += 2;
- }
- else
- {
- if (m_buffer[pos] == '\'')
- inString = false;
- pos++;
- }
- }
- else if (m_buffer[pos] == '@')
- {
- while (m_buffer[pos] != '\n' && m_buffer[pos] != 0)
- m_buffer[pos++] = ' ';
- }
- else
- {
- if (m_buffer[pos] == '\'')
- inString = true;
- pos++;
- }
- }
-}
-
-std::string CharmapReader::ReadConstant()
-{
- long startPos = m_pos;
-
- while (IsIdentifierChar(m_buffer[m_pos]))
- m_pos++;
-
- return std::string(&m_buffer[startPos], m_pos - startPos);
-}
-
-void CharmapReader::SkipWhitespace()
-{
- while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ')
- m_pos++;
-}
-
-Charmap::Charmap(std::string filename)
-{
- CharmapReader reader(filename);
-
- for (;;)
- {
- Lhs lhs = reader.ReadLhs();
-
- if (lhs.type == LhsType::None)
- return;
-
- reader.ExpectEqualsSign();
-
- std::string sequence = reader.ReadSequence();
-
- switch (lhs.type)
- {
- case LhsType::Char:
- if (m_chars.find(lhs.code) != m_chars.end())
- reader.RaiseError("redefining char");
- m_chars[lhs.code] = sequence;
- break;
- case LhsType::Escape:
- if (m_escapes[lhs.code].length() != 0)
- reader.RaiseError("redefining escape");
- m_escapes[lhs.code] = sequence;
- break;
- case LhsType::Constant:
- if (m_constants.find(lhs.name) != m_constants.end())
- reader.RaiseError("redefining constant");
- m_constants[lhs.name] = sequence;
- break;
- }
-
- reader.ExpectEmptyRestOfLine();
- }
-}
+// Copyright(c) 2016 YamaArashi +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include <cstdio> +#include <cstdint> +#include <cstdarg> +#include "preproc.h" +#include "charmap.h" +#include "char_util.h" +#include "utf8.h" + +enum LhsType +{ + Char, + Escape, + Constant, + None +}; + +struct Lhs +{ + LhsType type; + std::string name; + std::int32_t code; +}; + +class CharmapReader +{ +public: + CharmapReader(std::string filename); + CharmapReader(const CharmapReader&) = delete; + ~CharmapReader(); + Lhs ReadLhs(); + void ExpectEqualsSign(); + std::string ReadSequence(); + void ExpectEmptyRestOfLine(); + void RaiseError(const char* format, ...); + +private: + char* m_buffer; + long m_pos; + long m_size; + long m_lineNum; + std::string m_filename; + + void RemoveComments(); + std::string ReadConstant(); + void SkipWhitespace(); +}; + +CharmapReader::CharmapReader(std::string filename) : m_filename(filename) +{ + FILE *fp = std::fopen(filename.c_str(), "rb"); + + if (fp == NULL) + FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str()); + + std::fseek(fp, 0, SEEK_END); + + m_size = std::ftell(fp); + + if (m_size < 0) + FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str()); + + m_buffer = new char[m_size + 1]; + + std::rewind(fp); + + if (std::fread(m_buffer, m_size, 1, fp) != 1) + FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str()); + + m_buffer[m_size] = 0; + + std::fclose(fp); + + m_pos = 0; + m_lineNum = 1; + + RemoveComments(); +} + +CharmapReader::~CharmapReader() +{ + delete[] m_buffer; +} + +Lhs CharmapReader::ReadLhs() +{ + Lhs lhs; + + for (;;) + { + SkipWhitespace(); + + if (m_buffer[m_pos] == '\n') + { + m_pos++; + m_lineNum++; + } + else + { + break; + } + } + + if (m_buffer[m_pos] == '\'') + { + m_pos++; + + bool isEscape = (m_buffer[m_pos] == '\\'); + + if (isEscape) + { + m_pos++; + } + + unsigned char c = m_buffer[m_pos]; + + if (c == 0) + { + if (m_pos >= m_size) + RaiseError("unexpected EOF in UTF-8 character literal"); + else + RaiseError("unexpected null character in UTF-8 character literal"); + } + + if (IsAscii(c) && !IsAsciiPrintable(c)) + RaiseError("unexpected character U+%X in UTF-8 character literal", c); + + UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]); + std::int32_t code = unicodeChar.code; + + if (code == -1) + RaiseError("invalid encoding in UTF-8 character literal"); + + m_pos += unicodeChar.encodingLength; + + if (m_buffer[m_pos] != '\'') + RaiseError("unterminated character literal"); + + m_pos++; + + lhs.code = code; + + if (isEscape) + { + if (code >= 128) + RaiseError("escapes using non-ASCII characters are invalid"); + + switch (code) + { + case '\'': + lhs.type = LhsType::Char; + break; + case '\\': + lhs.type = LhsType::Char; + case '"': + RaiseError("cannot escape double quote"); + break; + default: + lhs.type = LhsType::Escape; + } + } + else + { + if (code == '\'') + RaiseError("empty character literal"); + + lhs.type = LhsType::Char; + } + } + else if (IsIdentifierStartingChar(m_buffer[m_pos])) + { + lhs.type = LhsType::Constant; + lhs.name = ReadConstant(); + } + else if (m_buffer[m_pos] == '\r') + { + RaiseError("only Unix-style LF newlines are supported"); + } + else if (m_buffer[m_pos] == 0) + { + if (m_pos < m_size) + RaiseError("unexpected null character"); + lhs.type = LhsType::None; + } + else + { + RaiseError("junk at start of line"); + } + + return lhs; +} + +void CharmapReader::ExpectEqualsSign() +{ + SkipWhitespace(); + + if (m_buffer[m_pos] != '=') + RaiseError("expected equals sign"); + + m_pos++; +} + +static unsigned int ConvertHexDigit(char c) +{ + unsigned int digit = 0; + + if (c >= '0' && c <= '9') + digit = c - '0'; + else if (c >= 'A' && c <= 'F') + digit = 10 + c - 'A'; + else if (c >= 'a' && c <= 'f') + digit = 10 + c - 'a'; + + return digit; +} + +std::string CharmapReader::ReadSequence() +{ + SkipWhitespace(); + + long startPos = m_pos; + + unsigned int length = 0; + + while (IsAsciiHexDigit(m_buffer[m_pos]) && IsAsciiHexDigit(m_buffer[m_pos + 1])) + { + m_pos += 2; + length++; + + if (length > kMaxCharmapSequenceLength) + RaiseError("byte sequence too long (max is %lu bytes)", kMaxCharmapSequenceLength); + + SkipWhitespace(); + } + + if (IsAsciiHexDigit(m_buffer[m_pos])) + RaiseError("each byte must have 2 hex digits"); + + if (length == 0) + RaiseError("expected byte sequence"); + + std::string sequence; + sequence.reserve(length); + + m_pos = startPos; + + for (unsigned int i = 0; i < length; i++) + { + unsigned int digit1 = ConvertHexDigit(m_buffer[m_pos]); + unsigned int digit2 = ConvertHexDigit(m_buffer[m_pos + 1]); + unsigned char byte = digit1 * 16 + digit2; + sequence += byte; + + m_pos += 2; + SkipWhitespace(); + } + + return sequence; +} + +void CharmapReader::ExpectEmptyRestOfLine() +{ + SkipWhitespace(); + + if (m_buffer[m_pos] == 0) + { + if (m_pos < m_size) + RaiseError("unexpected null character"); + } + else if (m_buffer[m_pos] == '\n') + { + m_pos++; + m_lineNum++; + } + else if (m_buffer[m_pos] == '\r') + { + RaiseError("only Unix-style LF newlines are supported"); + } + else + { + RaiseError("junk at end of line"); + } +} + +void CharmapReader::RaiseError(const char* format, ...) +{ + const int bufferSize = 1024; + char buffer[bufferSize]; + + std::va_list args; + va_start(args, format); + std::vsnprintf(buffer, bufferSize, format, args); + va_end(args); + + std::fprintf(stderr, "%s:%ld: error: %s\n", m_filename.c_str(), m_lineNum, buffer); + + std::exit(1); +} + +void CharmapReader::RemoveComments() +{ + long pos = 0; + bool inString = false; + + for (;;) + { + if (m_buffer[pos] == 0) + return; + + if (inString) + { + if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == '\'') + { + pos += 2; + } + else + { + if (m_buffer[pos] == '\'') + inString = false; + pos++; + } + } + else if (m_buffer[pos] == '@') + { + while (m_buffer[pos] != '\n' && m_buffer[pos] != 0) + m_buffer[pos++] = ' '; + } + else + { + if (m_buffer[pos] == '\'') + inString = true; + pos++; + } + } +} + +std::string CharmapReader::ReadConstant() +{ + long startPos = m_pos; + + while (IsIdentifierChar(m_buffer[m_pos])) + m_pos++; + + return std::string(&m_buffer[startPos], m_pos - startPos); +} + +void CharmapReader::SkipWhitespace() +{ + while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ') + m_pos++; +} + +Charmap::Charmap(std::string filename) +{ + CharmapReader reader(filename); + + for (;;) + { + Lhs lhs = reader.ReadLhs(); + + if (lhs.type == LhsType::None) + return; + + reader.ExpectEqualsSign(); + + std::string sequence = reader.ReadSequence(); + + switch (lhs.type) + { + case LhsType::Char: + if (m_chars.find(lhs.code) != m_chars.end()) + reader.RaiseError("redefining char"); + m_chars[lhs.code] = sequence; + break; + case LhsType::Escape: + if (m_escapes[lhs.code].length() != 0) + reader.RaiseError("redefining escape"); + m_escapes[lhs.code] = sequence; + break; + case LhsType::Constant: + if (m_constants.find(lhs.name) != m_constants.end()) + reader.RaiseError("redefining constant"); + m_constants[lhs.name] = sequence; + break; + } + + reader.ExpectEmptyRestOfLine(); + } +} |