aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/catch2/tools/scripts/updateDocumentToC.py
diff options
context:
space:
mode:
authorLexi Winter <lexi@le-fay.org>2025-06-29 19:28:09 +0100
committerLexi Winter <lexi@le-fay.org>2025-06-29 19:28:09 +0100
commit67b2fae1fa8b033045a44c1355d9dfd8f83e0d9b (patch)
tree1ecd818f4bcf7d12622d43dc92c4d4bb9b746d0f /contrib/catch2/tools/scripts/updateDocumentToC.py
parenta8b0ea58e60bb0326b7f7c8f3c736d89ce9ef1df (diff)
parentbc524d70253a4ab2fe40c3ca3e5666e267c0a4d1 (diff)
downloadnihil-67b2fae1fa8b033045a44c1355d9dfd8f83e0d9b.tar.gz
nihil-67b2fae1fa8b033045a44c1355d9dfd8f83e0d9b.tar.bz2
Add 'contrib/catch2/' from commit 'bc524d70253a4ab2fe40c3ca3e5666e267c0a4d1'
git-subtree-dir: contrib/catch2 git-subtree-mainline: a8b0ea58e60bb0326b7f7c8f3c736d89ce9ef1df git-subtree-split: bc524d70253a4ab2fe40c3ca3e5666e267c0a4d1
Diffstat (limited to 'contrib/catch2/tools/scripts/updateDocumentToC.py')
-rwxr-xr-xcontrib/catch2/tools/scripts/updateDocumentToC.py447
1 files changed, 447 insertions, 0 deletions
diff --git a/contrib/catch2/tools/scripts/updateDocumentToC.py b/contrib/catch2/tools/scripts/updateDocumentToC.py
new file mode 100755
index 0000000..1840cec
--- /dev/null
+++ b/contrib/catch2/tools/scripts/updateDocumentToC.py
@@ -0,0 +1,447 @@
+#!/usr/bin/env python3
+
+#
+# updateDocumentToC.py
+#
+# Insert table of contents at top of Catch markdown documents.
+#
+# This script is distributed under the GNU General Public License v3.0
+#
+# It is based on markdown-toclify version 1.7.1 by Sebastian Raschka,
+# https://github.com/rasbt/markdown-toclify
+#
+
+import argparse
+import glob
+import os
+import re
+import sys
+
+from scriptCommon import catchPath
+
+# Configuration:
+
+minTocEntries = 4
+
+headingExcludeDefault = [1,3,4,5] # use level 2 headers for at default
+headingExcludeRelease = [1,3,4,5] # use level 1 headers for release-notes.md
+
+documentsDefault = os.path.join(os.path.relpath(catchPath), 'docs/*.md')
+releaseNotesName = 'release-notes.md'
+
+contentTitle = '**Contents**'
+contentLineNo = 4
+contentLineNdx = contentLineNo - 1
+
+# End configuration
+
+VALIDS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-&'
+
+def readLines(in_file):
+ """Returns a list of lines from a input markdown file."""
+
+ with open(in_file, 'r') as inf:
+ in_contents = inf.read().split('\n')
+ return in_contents
+
+def removeLines(lines, remove=('[[back to top]', '<a class="mk-toclify"')):
+ """Removes existing [back to top] links and <a id> tags."""
+
+ if not remove:
+ return lines[:]
+
+ out = []
+ for l in lines:
+ if l.startswith(remove):
+ continue
+ out.append(l)
+ return out
+
+def removeToC(lines):
+ """Removes existing table of contents starting at index contentLineNdx."""
+ if not lines[contentLineNdx ].startswith(contentTitle):
+ return lines[:]
+
+ result_top = lines[:contentLineNdx]
+
+ pos = contentLineNdx + 1
+ while lines[pos].startswith('['):
+ pos = pos + 1
+
+ result_bottom = lines[pos + 1:]
+
+ return result_top + result_bottom
+
+def dashifyHeadline(line):
+ """
+ Takes a header line from a Markdown document and
+ returns a tuple of the
+ '#'-stripped version of the head line,
+ a string version for <a id=''></a> anchor tags,
+ and the level of the headline as integer.
+ E.g.,
+ >>> dashifyHeadline('### some header lvl3')
+ ('Some header lvl3', 'some-header-lvl3', 3)
+
+ """
+ stripped_right = line.rstrip('#')
+ stripped_both = stripped_right.lstrip('#')
+ level = len(stripped_right) - len(stripped_both)
+ stripped_wspace = stripped_both.strip()
+
+ # GitHub's sluggification works in an interesting way
+ # 1) '+', '/', '(', ')' and so on are just removed
+ # 2) spaces are converted into '-' directly
+ # 3) multiple -- are not collapsed
+
+ dashified = ''
+ for c in stripped_wspace:
+ if c in VALIDS:
+ dashified += c.lower()
+ elif c.isspace():
+ dashified += '-'
+ else:
+ # Unknown symbols are just removed
+ continue
+
+ return [stripped_wspace, dashified, level]
+
+def tagAndCollect(lines, id_tag=True, back_links=False, exclude_h=None):
+ """
+ Gets headlines from the markdown document and creates anchor tags.
+
+ Keyword arguments:
+ lines: a list of sublists where every sublist
+ represents a line from a Markdown document.
+ id_tag: if true, creates inserts a the <a id> tags (not req. by GitHub)
+ back_links: if true, adds "back to top" links below each headline
+ exclude_h: header levels to exclude. E.g., [2, 3]
+ excludes level 2 and 3 headings.
+
+ Returns a tuple of 2 lists:
+ 1st list:
+ A modified version of the input list where
+ <a id="some-header"></a> anchor tags where inserted
+ above the header lines (if github is False).
+
+ 2nd list:
+ A list of 3-value sublists, where the first value
+ represents the heading, the second value the string
+ that was inserted assigned to the IDs in the anchor tags,
+ and the third value is an integer that represents the headline level.
+ E.g.,
+ [['some header lvl3', 'some-header-lvl3', 3], ...]
+
+ """
+ out_contents = []
+ headlines = []
+ for l in lines:
+ saw_headline = False
+
+ orig_len = len(l)
+ l_stripped = l.lstrip()
+
+ if l_stripped.startswith(('# ', '## ', '### ', '#### ', '##### ', '###### ')):
+
+ # comply with new markdown standards
+
+ # not a headline if '#' not followed by whitespace '##no-header':
+ if not l.lstrip('#').startswith(' '):
+ continue
+ # not a headline if more than 6 '#':
+ if len(l) - len(l.lstrip('#')) > 6:
+ continue
+ # headers can be indented by at most 3 spaces:
+ if orig_len - len(l_stripped) > 3:
+ continue
+
+ # ignore empty headers
+ if not set(l) - {'#', ' '}:
+ continue
+
+ saw_headline = True
+ dashified = dashifyHeadline(l)
+
+ if not exclude_h or not dashified[-1] in exclude_h:
+ if id_tag:
+ id_tag = '<a class="mk-toclify" id="%s"></a>'\
+ % (dashified[1])
+ out_contents.append(id_tag)
+ headlines.append(dashified)
+
+ out_contents.append(l)
+ if back_links and saw_headline:
+ out_contents.append('[[back to top](#table-of-contents)]')
+ return out_contents, headlines
+
+def positioningHeadlines(headlines):
+ """
+ Strips unnecessary whitespaces/tabs if first header is not left-aligned
+ """
+ left_just = False
+ for row in headlines:
+ if row[-1] == 1:
+ left_just = True
+ break
+ if not left_just:
+ for row in headlines:
+ row[-1] -= 1
+ return headlines
+
+def createToc(headlines, hyperlink=True, top_link=False, no_toc_header=False):
+ """
+ Creates the table of contents from the headline list
+ that was returned by the tagAndCollect function.
+
+ Keyword Arguments:
+ headlines: list of lists
+ e.g., ['Some header lvl3', 'some-header-lvl3', 3]
+ hyperlink: Creates hyperlinks in Markdown format if True,
+ e.g., '- [Some header lvl1](#some-header-lvl1)'
+ top_link: if True, add a id tag for linking the table
+ of contents itself (for the back-to-top-links)
+ no_toc_header: suppresses TOC header if True.
+
+ Returns a list of headlines for a table of contents
+ in Markdown format,
+ e.g., [' - [Some header lvl3](#some-header-lvl3)', ...]
+
+ """
+ processed = []
+ if not no_toc_header:
+ if top_link:
+ processed.append('<a class="mk-toclify" id="table-of-contents"></a>\n')
+ processed.append(contentTitle + '<br>')
+
+ for line in headlines:
+ if hyperlink:
+ item = '[%s](#%s)' % (line[0], line[1])
+ else:
+ item = '%s- %s' % ((line[2]-1)*' ', line[0])
+ processed.append(item + '<br>')
+ processed.append('\n')
+ return processed
+
+def buildMarkdown(toc_headlines, body, spacer=0, placeholder=None):
+ """
+ Returns a string with the Markdown output contents incl.
+ the table of contents.
+
+ Keyword arguments:
+ toc_headlines: lines for the table of contents
+ as created by the createToc function.
+ body: contents of the Markdown file including
+ ID-anchor tags as returned by the
+ tagAndCollect function.
+ spacer: Adds vertical space after the table
+ of contents. Height in pixels.
+ placeholder: If a placeholder string is provided, the placeholder
+ will be replaced by the TOC instead of inserting the TOC at
+ the top of the document
+
+ """
+ if spacer:
+ spacer_line = ['\n<div style="height:%spx;"></div>\n' % (spacer)]
+ toc_markdown = "\n".join(toc_headlines + spacer_line)
+ else:
+ toc_markdown = "\n".join(toc_headlines)
+
+ if placeholder:
+ body_markdown = "\n".join(body)
+ markdown = body_markdown.replace(placeholder, toc_markdown)
+ else:
+ body_markdown_p1 = "\n".join(body[:contentLineNdx ]) + '\n'
+ body_markdown_p2 = "\n".join(body[ contentLineNdx:])
+ markdown = body_markdown_p1 + toc_markdown + body_markdown_p2
+
+ return markdown
+
+def outputMarkdown(markdown_cont, output_file):
+ """
+ Writes to an output file if `outfile` is a valid path.
+
+ """
+ if output_file:
+ with open(output_file, 'w') as out:
+ out.write(markdown_cont)
+
+def markdownToclify(
+ input_file,
+ output_file=None,
+ min_toc_len=2,
+ github=False,
+ back_to_top=False,
+ nolink=False,
+ no_toc_header=False,
+ spacer=0,
+ placeholder=None,
+ exclude_h=None):
+ """ Function to add table of contents to markdown files.
+
+ Parameters
+ -----------
+ input_file: str
+ Path to the markdown input file.
+
+ output_file: str (default: None)
+ Path to the markdown output file.
+
+ min_toc_len: int (default: 2)
+ Minimum number of entries to create a table of contents for.
+
+ github: bool (default: False)
+ Uses GitHub TOC syntax if True.
+
+ back_to_top: bool (default: False)
+ Inserts back-to-top links below headings if True.
+
+ nolink: bool (default: False)
+ Creates the table of contents without internal links if True.
+
+ no_toc_header: bool (default: False)
+ Suppresses the Table of Contents header if True
+
+ spacer: int (default: 0)
+ Inserts horizontal space (in pixels) after the table of contents.
+
+ placeholder: str (default: None)
+ Inserts the TOC at the placeholder string instead
+ of inserting the TOC at the top of the document.
+
+ exclude_h: list (default None)
+ Excludes header levels, e.g., if [2, 3], ignores header
+ levels 2 and 3 in the TOC.
+
+ Returns
+ -----------
+ changed: Boolean
+ True if the file has been updated, False otherwise.
+
+ """
+ cleaned_contents = removeLines(
+ removeToC(readLines(input_file)),
+ remove=('[[back to top]', '<a class="mk-toclify"'))
+
+ processed_contents, raw_headlines = tagAndCollect(
+ cleaned_contents,
+ id_tag=not github,
+ back_links=back_to_top,
+ exclude_h=exclude_h)
+
+ # add table of contents?
+ if len(raw_headlines) < min_toc_len:
+ processed_headlines = []
+ else:
+ leftjustified_headlines = positioningHeadlines(raw_headlines)
+
+ processed_headlines = createToc(
+ leftjustified_headlines,
+ hyperlink=not nolink,
+ top_link=not nolink and not github,
+ no_toc_header=no_toc_header)
+
+ if nolink:
+ processed_contents = cleaned_contents
+
+ cont = buildMarkdown(
+ toc_headlines=processed_headlines,
+ body=processed_contents,
+ spacer=spacer,
+ placeholder=placeholder)
+
+ if output_file:
+ outputMarkdown(cont, output_file)
+
+def isReleaseNotes(f):
+ return os.path.basename(f) == releaseNotesName
+
+def excludeHeadingsFor(f):
+ return headingExcludeRelease if isReleaseNotes(f) else headingExcludeDefault
+
+def updateSingleDocumentToC(input_file, min_toc_len, verbose=False):
+ """Add or update table of contents in specified file. Return 1 if file changed, 0 otherwise."""
+ if verbose :
+ print( 'file: {}'.format(input_file))
+
+ output_file = input_file + '.tmp'
+
+ markdownToclify(
+ input_file=input_file,
+ output_file=output_file,
+ min_toc_len=min_toc_len,
+ github=True,
+ back_to_top=False,
+ nolink=False,
+ no_toc_header=False,
+ spacer=False,
+ placeholder=False,
+ exclude_h=excludeHeadingsFor(input_file))
+
+ # prevent race-condition (Python 3.3):
+ if sys.version_info >= (3, 3):
+ os.replace(output_file, input_file)
+ else:
+ os.remove(input_file)
+ os.rename(output_file, input_file)
+
+ return 1
+
+def updateDocumentToC(paths, min_toc_len, verbose):
+ """Add or update table of contents to specified paths. Return number of changed files"""
+ n = 0
+ for g in paths:
+ for f in glob.glob(g):
+ if os.path.isfile(f):
+ n = n + updateSingleDocumentToC(input_file=f, min_toc_len=min_toc_len, verbose=verbose)
+ return n
+
+def updateDocumentToCMain():
+ """Add or update table of contents to specified paths."""
+
+ parser = argparse.ArgumentParser(
+ description='Add or update table of contents in markdown documents.',
+ epilog="""""",
+ formatter_class=argparse.RawTextHelpFormatter)
+
+ parser.add_argument(
+ 'Input',
+ metavar='file',
+ type=str,
+ nargs=argparse.REMAINDER,
+ help='files to process, at default: docs/*.md')
+
+ parser.add_argument(
+ '-v', '--verbose',
+ action='store_true',
+ help='report the name of the file being processed')
+
+ parser.add_argument(
+ '--min-toc-entries',
+ dest='minTocEntries',
+ default=minTocEntries,
+ type=int,
+ metavar='N',
+ help='the minimum number of entries to create a table of contents for [{default}]'.format(default=minTocEntries))
+
+ parser.add_argument(
+ '--remove-toc',
+ action='store_const',
+ dest='minTocEntries',
+ const=99,
+ help='remove all tables of contents')
+
+ args = parser.parse_args()
+
+ paths = args.Input if args.Input else [documentsDefault]
+
+ changedFiles = updateDocumentToC(paths=paths, min_toc_len=args.minTocEntries, verbose=args.verbose)
+
+ if changedFiles > 0:
+ print( "Processed table of contents in " + str(changedFiles) + " file(s)" )
+ else:
+ print( "No table of contents added or updated" )
+
+if __name__ == '__main__':
+ updateDocumentToCMain()
+
+# end of file