summaryrefslogtreecommitdiff
path: root/tools/toc.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/toc.py')
-rw-r--r--tools/toc.py98
1 files changed, 98 insertions, 0 deletions
diff --git a/tools/toc.py b/tools/toc.py
new file mode 100644
index 000000000..1d7a58cec
--- /dev/null
+++ b/tools/toc.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+Usage: python3 toc.py [-n] files.md...
+Replace a "## TOC" heading in a Markdown file with a table of contents,
+generated from the other headings in the file. Supports multiple files.
+Use "-n" for numbered list items.
+Headings must start with "##" signs to be detected.
+"""
+
+import sys
+import re
+from collections import namedtuple
+
+toc_name = 'Contents'
+valid_toc_headings = {'## TOC', '##TOC'}
+
+TocItem = namedtuple('TocItem', ['name', 'anchor', 'level'])
+punctuation_regexp = re.compile(r'[^\w\- ]+')
+
+def name_to_anchor(name):
+ # GitHub's algorithm for generating anchors from headings
+ # https://github.com/jch/html-pipeline/blob/master/lib/html/pipeline/toc_filter.rb
+ anchor = name.strip().lower() # lowercase
+ anchor = re.sub(punctuation_regexp, '', anchor) # remove punctuation
+ anchor = anchor.replace(' ', '-') # replace spaces with dash
+ return anchor
+
+def get_toc_index(lines):
+ toc_index = None
+ for i, line in enumerate(lines):
+ if line.rstrip() in valid_toc_headings:
+ toc_index = i
+ break
+ return toc_index
+
+def get_toc_items(lines, toc_index):
+ for i, line in enumerate(lines):
+ if i <= toc_index:
+ continue
+ if line.startswith('##'):
+ name = line.lstrip('#')
+ level = len(line) - len(name) - len('##')
+ name = name.strip()
+ anchor = name_to_anchor(name)
+ yield TocItem(name, anchor, level)
+
+def toc_string(toc_items, numeric):
+ lines = ['## %s' % toc_name, '']
+ for name, anchor, level in toc_items:
+ padding = ' ' * level
+ line = '%s- [%s](#%s)' % (padding, name, anchor)
+ lines.append(line)
+ return '\n'.join(lines) + '\n'
+
+def add_toc(filename, numeric):
+ with open(filename, 'r', encoding='utf-8') as f:
+ lines = f.readlines()
+ toc_index = get_toc_index(lines)
+ if toc_index is None:
+ return None # no TOC heading
+ toc_items = list(get_toc_items(lines, toc_index))
+ if not toc_items:
+ return False # no content headings
+ with open(filename, 'w', encoding='utf-8') as f:
+ for i, line in enumerate(lines):
+ if i == toc_index:
+ f.write(toc_string(toc_items, numeric))
+ else:
+ f.write(line)
+ return True # OK
+
+def main():
+ if len(sys.argv) < 2:
+ print('*** ERROR: Not enough arguments')
+ print(__doc__)
+ exit(1)
+ del sys.argv[0]
+ numeric = False
+ if sys.argv[0] == '-n':
+ numeric = True
+ del sys.argv[0]
+ if not sys.argv:
+ print('*** ERROR: No filenames specified')
+ exit(1)
+ for filename in sys.argv:
+ print(filename)
+ result = add_toc(filename, numeric)
+ if result is None:
+ print('*** WARNING: No "## TOC" heading found')
+ elif result is False:
+ print('*** WARNING: No content headings found')
+ else:
+ print('OK')
+
+if __name__ == '__main__':
+ main()