Files
@ f0fbb0fe4462
Branch filter:
Location: kallithea/scripts/docs-headings.py - annotation
f0fbb0fe4462
2.6 KiB
text/x-python
git: update check for invalid URL characters to work with Python versions that include an attempt at fixing the very same problem
With changes like
https://github.com/python/cpython/commit/76cd81d60310d65d01f9d7b48a8985d8ab89c8b4
making it to Python 3.10 and being backported to previous Python versions, the
approach in a8a51a3bdb61 no longer works when combined with
urllib.parse.urlparse in d2f59de17bef: path will never contain the invalid
characters.
To catch this case anyway, add a new check to verify that the parsed URL can
roundtrip back to the original representation with urllib.parse.urlunparse .
The actual exception might vary, but one of them should always fire.
There is a risk that the new check will reject some URLs that somehow isn't
normalized. No such cases have been found yet.
With changes like
https://github.com/python/cpython/commit/76cd81d60310d65d01f9d7b48a8985d8ab89c8b4
making it to Python 3.10 and being backported to previous Python versions, the
approach in a8a51a3bdb61 no longer works when combined with
urllib.parse.urlparse in d2f59de17bef: path will never contain the invalid
characters.
To catch this case anyway, add a new check to verify that the parsed URL can
roundtrip back to the original representation with urllib.parse.urlunparse .
The actual exception might vary, but one of them should always fire.
There is a risk that the new check will reject some URLs that somehow isn't
normalized. No such cases have been found yet.
aa6f17a53b49 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 0a277465fddf f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 0a84ef075575 a188803df37e 01aca0a4f876 a8e6bb9ee9ea 665dfa112f2c f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 a8e6bb9ee9ea f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 a8e6bb9ee9ea f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 665dfa112f2c a188803df37e a8e6bb9ee9ea f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 | #!/usr/bin/env python3
"""
Consistent formatting of rst section titles
"""
import re
import subprocess
spaces = [
(0, 1), # we assume this is a over-and-underlined header
(2, 1),
(1, 1),
(1, 0),
(1, 0),
]
# http://sphinx-doc.org/rest.html :
# for the Python documentation, this convention is used which you may follow:
# # with overline, for parts
# * with overline, for chapters
# =, for sections
# -, for subsections
# ^, for subsubsections
# ", for paragraphs
pystyles = ['#', '*', '=', '-', '^', '"']
# match on a header line underlined with one of the valid characters
headermatch = re.compile(r'''\n*(.+)\n([][!"#$%&'()*+,./:;<=>?@\\^_`{|}~-])\2{2,}\n+''', flags=re.MULTILINE)
def main():
filenames = subprocess.check_output(['hg', 'files', 'set:**.rst+kallithea/i18n/how_to']).splitlines()
for fn in filenames:
fn = fn.decode()
print('processing %s' % fn)
s = open(fn).read()
# find levels and their styles
lastpos = 0
styles = []
for markup in headermatch.findall(s):
style = markup[1]
if style in styles:
stylepos = styles.index(style)
if stylepos > lastpos + 1:
print('bad style %r with level %s - was at %s' % (style, stylepos, lastpos))
else:
stylepos = len(styles)
if stylepos > lastpos + 1:
print('bad new style %r - expected %r' % (style, styles[lastpos + 1]))
else:
styles.append(style)
lastpos = stylepos
# remove superfluous spacing (may however be restored by header spacing)
s = re.sub(r'''(\n\n)\n*''', r'\1', s, flags=re.MULTILINE)
if styles:
newstyles = pystyles[pystyles.index(styles[0]):]
def subf(m):
title, style = m.groups()
level = styles.index(style)
before, after = spaces[level]
newstyle = newstyles[level]
return '\n' * (before + 1) + title + '\n' + newstyle * len(title) + '\n' * (after + 1)
s = headermatch.sub(subf, s)
# remove superfluous spacing when headers are adjacent
s = re.sub(r'''(\n.+\n([][!"#$%&'()*+,./:;<=>?@\\^_`{|}~-])\2{2,}\n\n\n)\n*''', r'\1', s, flags=re.MULTILINE)
# fix trailing space and spacing before link sections
s = s.strip() + '\n'
s = re.sub(r'''\n+((?:\.\. _[^\n]*\n)+)$''', r'\n\n\n\1', s)
open(fn, 'w').write(s)
print(subprocess.check_output(['hg', 'diff'] + filenames))
if __name__ == '__main__':
main()
|