summaryrefslogtreecommitdiff
path: root/utils/read_charmap.py
diff options
context:
space:
mode:
Diffstat (limited to 'utils/read_charmap.py')
-rw-r--r--utils/read_charmap.py45
1 files changed, 45 insertions, 0 deletions
diff --git a/utils/read_charmap.py b/utils/read_charmap.py
new file mode 100644
index 0000000..77036f6
--- /dev/null
+++ b/utils/read_charmap.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import os, io
+from re import compile
+from sys import stderr
+
+charmap_regex = compile('[ \t]*charmap[ \t]+"(.*?)",[ \t]*(\$[0-9A-Fa-f]{2}|%[01]{8}|[0-9]{3})')
+# A charmap line is
+# [ \t]* - zero or more space chars
+# charmap - literal charmap
+# [ \t]+ - one or more space chars
+# "(.*?)" - a lazily-matched text identifier in quotes
+# , - literal comma
+# [ \t]* - zero or more space chars
+# ( - either of
+# \$[0-9A-Fa-f]{2} - two hexadecimal digits preceeded by literal $
+# %[01]{8} - eight dual digits preceeded by literal %
+# [0-9]{3} - three decimal digits
+# )
+
+def parse_int(s):
+ # assumes integers are literal; no +-*/, etc
+ s = s.strip()
+ if s.startswith('$'):
+ return int(s[1:], 16)
+ if s.startswith('%'):
+ return int(s[1:], 2)
+ return int(s)
+
+def read_charmap(charmap_path):
+ charmap = {}
+ with io.open(charmap_path, 'r', encoding='utf-8') as f:
+ lines = f.readlines()
+ for line in lines:
+ m = charmap_regex.match(line)
+ if m is None:
+ continue
+ char = m.group(1)
+ value = parse_int(m.group(2))
+ if value in charmap:
+ print('Value {0:s} already in charmap, dropping it in favor of first charmap entry'.format(m.group(2)))
+ continue
+ charmap[value] = char
+ return charmap