From 83af28e32f2bfcde9210c06bc5eb6466f92e4aa2 Mon Sep 17 00:00:00 2001 From: Rangi42 Date: Mon, 26 Feb 2018 11:28:46 -0500 Subject: Add more tables of contents, and commit the script used --- toc.py | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 toc.py (limited to 'toc.py') diff --git a/toc.py b/toc.py new file mode 100644 index 0000000..9bdc8cc --- /dev/null +++ b/toc.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Usage: python3 toc.py [-n] files.md... +Replace a "## TOC" heading in a Markdown file with a table of contents, +generated from the other headings in the file. Supports multiple files. +Headings must start with "##" signs to be detected. +""" + +import sys +import re +from collections import namedtuple + +toc_name = 'Contents' +valid_toc_headings = {'## TOC', '##TOC'} + +TocItem = namedtuple('TocItem', ['name', 'anchor', 'level']) +punctuation_regexp = re.compile(r'[^\w\- ]+') + +def name_to_anchor(name): + # GitHub's algorithm for generating anchors from headings + # https://github.com/jch/html-pipeline/blob/master/lib/html/pipeline/toc_filter.rb + anchor = name.strip().lower() # lowercase + anchor = re.sub(punctuation_regexp, '', anchor) # remove punctuation + anchor = anchor.replace(' ', '-') # replace spaces with dash + return anchor + +def get_toc_index(lines): + toc_index = None + for i, line in enumerate(lines): + if line.rstrip() in valid_toc_headings: + toc_index = i + break + return toc_index + +def get_toc_items(lines, toc_index): + for i, line in enumerate(lines): + if i <= toc_index: + continue + if line.startswith('##'): + name = line.lstrip('#') + level = len(line) - len(name) - len('##') + name = name.strip() + anchor = name_to_anchor(name) + yield TocItem(name, anchor, level) + +def toc_string(toc_items): + lines = ['## %s' % toc_name, ''] + for name, anchor, level in toc_items: + padding = ' ' * level + line = '%s- [%s](#%s)' % (padding, name, anchor) + lines.append(line) + return '\n'.join(lines) + '\n' + +def add_toc(filename): + with open(filename, 'r', encoding='utf-8') as f: + lines = f.readlines() + toc_index = get_toc_index(lines) + if toc_index is None: + return None # no TOC heading + toc_items = list(get_toc_items(lines, toc_index)) + if not toc_items: + return False # no content headings + with open(filename, 'w', encoding='utf-8') as f: + for i, line in enumerate(lines): + if i == toc_index: + f.write(toc_string(toc_items)) + else: + f.write(line) + return True # OK + +def main(): + if len(sys.argv) < 2: + print('*** ERROR: No filenames specified') + print(__doc__) + exit(1) + for filename in sys.argv[1:]: + print(filename) + result = add_toc(filename) + if result is None: + print('*** WARNING: No "## TOC" heading found') + elif result is False: + print('*** WARNING: No content headings found') + else: + print('OK') + +if __name__ == '__main__': + main() -- cgit v1.2.3