diff options
-rw-r--r-- | convert-sphinx.py | 79 | ||||
-rwxr-xr-x | update-docs.sh | 13 |
2 files changed, 92 insertions, 0 deletions
diff --git a/convert-sphinx.py b/convert-sphinx.py new file mode 100644 index 00000000000..a260c200081 --- /dev/null +++ b/convert-sphinx.py @@ -0,0 +1,79 @@ +import os, glob +from bs4 import BeautifulSoup +from subprocess import run, PIPE +from urllib.parse import urlparse +import dashtable + +def html_to_rst(input): + return run(['pandoc', '-f', 'html', '-t', 'rst'], + input=input, stdout=PIPE, universal_newlines=True).stdout + +def convert_toc(filename): + with open(filename, encoding='utf8') as input: + soup = BeautifulSoup(input, 'html5lib') + body = soup.find('body') + with open('./docs/contents.rst', 'w', encoding='utf-8') as output: + for elm in body.contents: + if elm.name == 'h2': + output.write(""".. toctree:: + :maxdepth: 1 + :caption: {0} + :hidden:\n""".format(elm.get_text())) + elif elm.name == 'ul': + output.write('\n') + for li in elm.contents: + if li.name == 'li': + a = li.find('a') + url = a['href'] + if url == 'index.html': + output.write(' self\n') + elif bool(urlparse(url).netloc): + output.write(' {0} <{1}>\n'.format(a.get_text(), url)) + else: + output.write(' {0}\n'.format(url[:-5])) + output.write('\n') + elif elm.name == 'dl': + a = elm.find('a') + output.write('\n {0} <{1}>\n'.format(a.get_text(), url)) + elif hasattr(elm, 'contents'): + print('**** UNKNOWN: ' + str(elm)) + exit(1) + print("SUCCESS: " + filename) + +def convert_article(filename): + with open(filename, encoding='utf8') as input: + soup = BeautifulSoup(input, 'html5lib') + + table = None + if filename == './docs/release-calendar.html': + table = dashtable.html2rst(str(soup.table.extract())) + + content = soup.find('div', 'content') + content = ''.join(map(str, content.contents)) + content = html_to_rst(str(content)) + + if table: + content = '\n'.join([content, table, '']) + + with open(os.path.splitext(filename)[0]+'.rst', 'w', encoding='utf-8') as output: + output.write(str(content)) + if filename == './docs/relnotes.html': + output.write("""\n.. toctree:: + :maxdepth: 1 + :hidden:\n""") + output.write('\n') + for li in soup.findAll('li'): + a = li.find('a') + url = a['href'] + split = os.path.splitext(url) + if split[1] == '.html': + output.write(' {0}\n'.format(split[0])) + output.write(' Older Versions <versions>\n') + + print("SUCCESS: " + filename) + +for filename in glob.iglob('./docs/**/*.html', recursive=True): + if filename == './docs/contents.html': + convert_toc(filename) + else: + convert_article(filename) diff --git a/update-docs.sh b/update-docs.sh new file mode 100755 index 00000000000..7b16cecdb35 --- /dev/null +++ b/update-docs.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +python3 ./convert-sphinx.py && +git add "docs/**.rst" && git rm "docs/**.html" && +git commit -am "docs: convert articles to reructuredtext + +This uses the previously added scripts to convert the documentation to +reStructuredText, which is both easier to read offline, and can be used +to generate modern HTML for online documentation. + +No modification to the generated results have been done. + +Acked-by: Eric Engestrom <[email protected]>" |