summaryrefslogtreecommitdiff
path: root/tools/preproc/charmap.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tools/preproc/charmap.cpp')
-rw-r--r--tools/preproc/charmap.cpp408
1 files changed, 408 insertions, 0 deletions
diff --git a/tools/preproc/charmap.cpp b/tools/preproc/charmap.cpp
new file mode 100644
index 0000000..a7bedfe
--- /dev/null
+++ b/tools/preproc/charmap.cpp
@@ -0,0 +1,408 @@
+// Copyright(c) 2016 YamaArashi
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include <cstdio>
+#include <cstdint>
+#include <cstdarg>
+#include "preproc.h"
+#include "charmap.h"
+#include "char_util.h"
+#include "utf8.h"
+
+enum LhsType
+{
+ Char,
+ Escape,
+ Constant,
+ None
+};
+
+struct Lhs
+{
+ LhsType type;
+ std::string name;
+ std::int32_t code;
+};
+
+class CharmapReader
+{
+public:
+ CharmapReader(std::string filename);
+ CharmapReader(const CharmapReader&) = delete;
+ ~CharmapReader();
+ Lhs ReadLhs();
+ void ExpectEqualsSign();
+ std::string ReadSequence();
+ void ExpectEmptyRestOfLine();
+ void RaiseError(const char* format, ...);
+
+private:
+ char* m_buffer;
+ long m_pos;
+ long m_size;
+ long m_lineNum;
+ std::string m_filename;
+
+ void RemoveComments();
+ std::string ReadConstant();
+ void SkipWhitespace();
+};
+
+CharmapReader::CharmapReader(std::string filename) : m_filename(filename)
+{
+ FILE *fp = std::fopen(filename.c_str(), "rb");
+
+ if (fp == NULL)
+ FATAL_ERROR("Failed to open \"%s\" for reading.\n", filename.c_str());
+
+ std::fseek(fp, 0, SEEK_END);
+
+ m_size = std::ftell(fp);
+
+ if (m_size < 0)
+ FATAL_ERROR("File size of \"%s\" is less than zero.\n", filename.c_str());
+
+ m_buffer = new char[m_size + 1];
+
+ std::rewind(fp);
+
+ if (std::fread(m_buffer, m_size, 1, fp) != 1)
+ FATAL_ERROR("Failed to read \"%s\".\n", filename.c_str());
+
+ m_buffer[m_size] = 0;
+
+ std::fclose(fp);
+
+ m_pos = 0;
+ m_lineNum = 1;
+
+ RemoveComments();
+}
+
+CharmapReader::~CharmapReader()
+{
+ delete[] m_buffer;
+}
+
+Lhs CharmapReader::ReadLhs()
+{
+ Lhs lhs;
+
+ for (;;)
+ {
+ SkipWhitespace();
+
+ if (m_buffer[m_pos] == '\n')
+ {
+ m_pos++;
+ m_lineNum++;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ if (m_buffer[m_pos] == '\'')
+ {
+ m_pos++;
+
+ bool isEscape = (m_buffer[m_pos] == '\\');
+
+ if (isEscape)
+ {
+ m_pos++;
+ }
+
+ unsigned char c = m_buffer[m_pos];
+
+ if (c == 0)
+ {
+ if (m_pos >= m_size)
+ RaiseError("unexpected EOF in UTF-8 character literal");
+ else
+ RaiseError("unexpected null character in UTF-8 character literal");
+ }
+
+ if (IsAscii(c) && !IsAsciiPrintable(c))
+ RaiseError("unexpected character U+%X in UTF-8 character literal", c);
+
+ UnicodeChar unicodeChar = DecodeUtf8(&m_buffer[m_pos]);
+ std::int32_t code = unicodeChar.code;
+
+ if (code == -1)
+ RaiseError("invalid encoding in UTF-8 character literal");
+
+ m_pos += unicodeChar.encodingLength;
+
+ if (m_buffer[m_pos] != '\'')
+ RaiseError("unterminated character literal");
+
+ m_pos++;
+
+ lhs.code = code;
+
+ if (isEscape)
+ {
+ if (code >= 128)
+ RaiseError("escapes using non-ASCII characters are invalid");
+
+ switch (code)
+ {
+ case '\'':
+ lhs.type = LhsType::Char;
+ break;
+ case '\\':
+ lhs.type = LhsType::Char;
+ case '"':
+ RaiseError("cannot escape double quote");
+ break;
+ default:
+ lhs.type = LhsType::Escape;
+ }
+ }
+ else
+ {
+ if (code == '\'')
+ RaiseError("empty character literal");
+
+ lhs.type = LhsType::Char;
+ }
+ }
+ else if (IsIdentifierStartingChar(m_buffer[m_pos]))
+ {
+ lhs.type = LhsType::Constant;
+ lhs.name = ReadConstant();
+ }
+ else if (m_buffer[m_pos] == '\r')
+ {
+ RaiseError("only Unix-style LF newlines are supported");
+ }
+ else if (m_buffer[m_pos] == 0)
+ {
+ if (m_pos < m_size)
+ RaiseError("unexpected null character");
+ lhs.type = LhsType::None;
+ }
+ else
+ {
+ RaiseError("junk at start of line");
+ }
+
+ return lhs;
+}
+
+void CharmapReader::ExpectEqualsSign()
+{
+ SkipWhitespace();
+
+ if (m_buffer[m_pos] != '=')
+ RaiseError("expected equals sign");
+
+ m_pos++;
+}
+
+static unsigned int ConvertHexDigit(char c)
+{
+ unsigned int digit = 0;
+
+ if (c >= '0' && c <= '9')
+ digit = c - '0';
+ else if (c >= 'A' && c <= 'F')
+ digit = 10 + c - 'A';
+ else if (c >= 'a' && c <= 'f')
+ digit = 10 + c - 'a';
+
+ return digit;
+}
+
+std::string CharmapReader::ReadSequence()
+{
+ SkipWhitespace();
+
+ long startPos = m_pos;
+
+ unsigned int length = 0;
+
+ while (IsAsciiHexDigit(m_buffer[m_pos]) && IsAsciiHexDigit(m_buffer[m_pos + 1]))
+ {
+ m_pos += 2;
+ length++;
+
+ if (length > kMaxCharmapSequenceLength)
+ RaiseError("byte sequence too long (max is %lu bytes)", kMaxCharmapSequenceLength);
+
+ SkipWhitespace();
+ }
+
+ if (IsAsciiHexDigit(m_buffer[m_pos]))
+ RaiseError("each byte must have 2 hex digits");
+
+ if (length == 0)
+ RaiseError("expected byte sequence");
+
+ std::string sequence;
+ sequence.reserve(length);
+
+ m_pos = startPos;
+
+ for (unsigned int i = 0; i < length; i++)
+ {
+ unsigned int digit1 = ConvertHexDigit(m_buffer[m_pos]);
+ unsigned int digit2 = ConvertHexDigit(m_buffer[m_pos + 1]);
+ unsigned char byte = digit1 * 16 + digit2;
+ sequence += byte;
+
+ m_pos += 2;
+ SkipWhitespace();
+ }
+
+ return sequence;
+}
+
+void CharmapReader::ExpectEmptyRestOfLine()
+{
+ SkipWhitespace();
+
+ if (m_buffer[m_pos] == 0)
+ {
+ if (m_pos < m_size)
+ RaiseError("unexpected null character");
+ }
+ else if (m_buffer[m_pos] == '\n')
+ {
+ m_pos++;
+ m_lineNum++;
+ }
+ else if (m_buffer[m_pos] == '\r')
+ {
+ RaiseError("only Unix-style LF newlines are supported");
+ }
+ else
+ {
+ RaiseError("junk at end of line");
+ }
+}
+
+void CharmapReader::RaiseError(const char* format, ...)
+{
+ const int bufferSize = 1024;
+ char buffer[bufferSize];
+
+ std::va_list args;
+ va_start(args, format);
+ std::vsnprintf(buffer, bufferSize, format, args);
+ va_end(args);
+
+ std::fprintf(stderr, "%s:%ld: error: %s\n", m_filename.c_str(), m_lineNum, buffer);
+
+ std::exit(1);
+}
+
+void CharmapReader::RemoveComments()
+{
+ long pos = 0;
+ bool inString = false;
+
+ for (;;)
+ {
+ if (m_buffer[pos] == 0)
+ return;
+
+ if (inString)
+ {
+ if (m_buffer[pos] == '\\' && m_buffer[pos + 1] == '\'')
+ {
+ pos += 2;
+ }
+ else
+ {
+ if (m_buffer[pos] == '\'')
+ inString = false;
+ pos++;
+ }
+ }
+ else if (m_buffer[pos] == '@')
+ {
+ while (m_buffer[pos] != '\n' && m_buffer[pos] != 0)
+ m_buffer[pos++] = ' ';
+ }
+ else
+ {
+ if (m_buffer[pos] == '\'')
+ inString = true;
+ pos++;
+ }
+ }
+}
+
+std::string CharmapReader::ReadConstant()
+{
+ long startPos = m_pos;
+
+ while (IsIdentifierChar(m_buffer[m_pos]))
+ m_pos++;
+
+ return std::string(&m_buffer[startPos], m_pos - startPos);
+}
+
+void CharmapReader::SkipWhitespace()
+{
+ while (m_buffer[m_pos] == '\t' || m_buffer[m_pos] == ' ')
+ m_pos++;
+}
+
+Charmap::Charmap(std::string filename)
+{
+ CharmapReader reader(filename);
+
+ for (;;)
+ {
+ Lhs lhs = reader.ReadLhs();
+
+ if (lhs.type == LhsType::None)
+ return;
+
+ reader.ExpectEqualsSign();
+
+ std::string sequence = reader.ReadSequence();
+
+ switch (lhs.type)
+ {
+ case LhsType::Char:
+ if (m_chars.find(lhs.code) != m_chars.end())
+ reader.RaiseError("redefining char");
+ m_chars[lhs.code] = sequence;
+ break;
+ case LhsType::Escape:
+ if (m_escapes[lhs.code].length() != 0)
+ reader.RaiseError("redefining escape");
+ m_escapes[lhs.code] = sequence;
+ break;
+ case LhsType::Constant:
+ if (m_constants.find(lhs.name) != m_constants.end())
+ reader.RaiseError("redefining constant");
+ m_constants[lhs.name] = sequence;
+ break;
+ }
+
+ reader.ExpectEmptyRestOfLine();
+ }
+}