Add more tables of contents, and commit the script used

author: Rangi42 <remy.oukaour+rangi42@gmail.com> 2018-02-26 11:28:46 -0500
committer: Rangi42 <remy.oukaour+rangi42@gmail.com> 2018-02-26 11:28:46 -0500
commit: 83af28e32f2bfcde9210c06bc5eb6466f92e4aa2 (patch)
tree: 7f5a34575b27d4d8f871a373ac1b4978f8657497 /toc.py
parent: aaedef5dd3edff15a809b4c3e0f9c4e6eaa310c5 (diff)
1 files changed, 89 insertions, 0 deletions
diff --git a/toc.py b/toc.py
new file mode 100644
index 0000000..9bdc8cc
--- /dev/null
+++ b/toc.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+Usage: python3 toc.py [-n] files.md...
+Replace a "## TOC" heading in a Markdown file with a table of contents,
+generated from the other headings in the file. Supports multiple files.
+Headings must start with "##" signs to be detected.
+"""
+
+import sys
+import re
+from collections import namedtuple
+
+toc_name = 'Contents'
+valid_toc_headings = {'## TOC', '##TOC'}
+
+TocItem = namedtuple('TocItem', ['name', 'anchor', 'level'])
+punctuation_regexp = re.compile(r'[^\w\- ]+')
+
+def name_to_anchor(name):
+	# GitHub's algorithm for generating anchors from headings
+	# https://github.com/jch/html-pipeline/blob/master/lib/html/pipeline/toc_filter.rb
+	anchor = name.strip().lower()                   # lowercase
+	anchor = re.sub(punctuation_regexp, '', anchor) # remove punctuation
+	anchor = anchor.replace(' ', '-')               # replace spaces with dash
+	return anchor
+
+def get_toc_index(lines):
+	toc_index = None
+	for i, line in enumerate(lines):
+		if line.rstrip() in valid_toc_headings:
+			toc_index = i
+			break
+	return toc_index
+
+def get_toc_items(lines, toc_index):
+	for i, line in enumerate(lines):
+		if i <= toc_index:
+			continue
+		if line.startswith('##'):
+			name = line.lstrip('#')
+			level = len(line) - len(name) - len('##')
+			name = name.strip()
+			anchor = name_to_anchor(name)
+			yield TocItem(name, anchor, level)
+
+def toc_string(toc_items):
+	lines = ['## %s' % toc_name, '']
+	for name, anchor, level in toc_items:
+		padding = '  ' * level
+		line = '%s- [%s](#%s)' % (padding, name, anchor)
+		lines.append(line)
+	return '\n'.join(lines) + '\n'
+
+def add_toc(filename):
+	with open(filename, 'r', encoding='utf-8') as f:
+		lines = f.readlines()
+	toc_index = get_toc_index(lines)
+	if toc_index is None:
+		return None # no TOC heading
+	toc_items = list(get_toc_items(lines, toc_index))
+	if not toc_items:
+		return False # no content headings
+	with open(filename, 'w', encoding='utf-8') as f:
+		for i, line in enumerate(lines):
+			if i == toc_index:
+				f.write(toc_string(toc_items))
+			else:
+				f.write(line)
+	return True # OK
+
+def main():
+	if len(sys.argv) < 2:
+		print('*** ERROR: No filenames specified')
+		print(__doc__)
+		exit(1)
+	for filename in sys.argv[1:]:
+		print(filename)
+		result = add_toc(filename)
+		if result is None:
+			print('*** WARNING: No "## TOC" heading found')
+		elif result is False:
+			print('*** WARNING: No content headings found')
+		else:
+			print('OK')
+
+if __name__ == '__main__':
+	main()
author	Rangi42 <remy.oukaour+rangi42@gmail.com>	2018-02-26 11:28:46 -0500
committer	Rangi42 <remy.oukaour+rangi42@gmail.com>	2018-02-26 11:28:46 -0500
commit	83af28e32f2bfcde9210c06bc5eb6466f92e4aa2 (patch)
tree	7f5a34575b27d4d8f871a373ac1b4978f8657497 /toc.py
parent	aaedef5dd3edff15a809b4c3e0f9c4e6eaa310c5 (diff)