summaryrefslogtreecommitdiff
path: root/toc.py
diff options
context:
space:
mode:
authorRangi42 <remy.oukaour+rangi42@gmail.com>2018-02-26 11:28:46 -0500
committerRangi42 <remy.oukaour+rangi42@gmail.com>2018-02-26 11:28:46 -0500
commit83af28e32f2bfcde9210c06bc5eb6466f92e4aa2 (patch)
tree7f5a34575b27d4d8f871a373ac1b4978f8657497 /toc.py
parentaaedef5dd3edff15a809b4c3e0f9c4e6eaa310c5 (diff)
Add more tables of contents, and commit the script used
Diffstat (limited to 'toc.py')
-rw-r--r--toc.py89
1 files changed, 89 insertions, 0 deletions
diff --git a/toc.py b/toc.py
new file mode 100644
index 0000000..9bdc8cc
--- /dev/null
+++ b/toc.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+Usage: python3 toc.py [-n] files.md...
+Replace a "## TOC" heading in a Markdown file with a table of contents,
+generated from the other headings in the file. Supports multiple files.
+Headings must start with "##" signs to be detected.
+"""
+
+import sys
+import re
+from collections import namedtuple
+
+toc_name = 'Contents'
+valid_toc_headings = {'## TOC', '##TOC'}
+
+TocItem = namedtuple('TocItem', ['name', 'anchor', 'level'])
+punctuation_regexp = re.compile(r'[^\w\- ]+')
+
+def name_to_anchor(name):
+ # GitHub's algorithm for generating anchors from headings
+ # https://github.com/jch/html-pipeline/blob/master/lib/html/pipeline/toc_filter.rb
+ anchor = name.strip().lower() # lowercase
+ anchor = re.sub(punctuation_regexp, '', anchor) # remove punctuation
+ anchor = anchor.replace(' ', '-') # replace spaces with dash
+ return anchor
+
+def get_toc_index(lines):
+ toc_index = None
+ for i, line in enumerate(lines):
+ if line.rstrip() in valid_toc_headings:
+ toc_index = i
+ break
+ return toc_index
+
+def get_toc_items(lines, toc_index):
+ for i, line in enumerate(lines):
+ if i <= toc_index:
+ continue
+ if line.startswith('##'):
+ name = line.lstrip('#')
+ level = len(line) - len(name) - len('##')
+ name = name.strip()
+ anchor = name_to_anchor(name)
+ yield TocItem(name, anchor, level)
+
+def toc_string(toc_items):
+ lines = ['## %s' % toc_name, '']
+ for name, anchor, level in toc_items:
+ padding = ' ' * level
+ line = '%s- [%s](#%s)' % (padding, name, anchor)
+ lines.append(line)
+ return '\n'.join(lines) + '\n'
+
+def add_toc(filename):
+ with open(filename, 'r', encoding='utf-8') as f:
+ lines = f.readlines()
+ toc_index = get_toc_index(lines)
+ if toc_index is None:
+ return None # no TOC heading
+ toc_items = list(get_toc_items(lines, toc_index))
+ if not toc_items:
+ return False # no content headings
+ with open(filename, 'w', encoding='utf-8') as f:
+ for i, line in enumerate(lines):
+ if i == toc_index:
+ f.write(toc_string(toc_items))
+ else:
+ f.write(line)
+ return True # OK
+
+def main():
+ if len(sys.argv) < 2:
+ print('*** ERROR: No filenames specified')
+ print(__doc__)
+ exit(1)
+ for filename in sys.argv[1:]:
+ print(filename)
+ result = add_toc(filename)
+ if result is None:
+ print('*** WARNING: No "## TOC" heading found')
+ elif result is False:
+ print('*** WARNING: No content headings found')
+ else:
+ print('OK')
+
+if __name__ == '__main__':
+ main()