Files
@ d35d14b05b82
Branch filter:
Location: kallithea/scripts/docs-headings.py - annotation
d35d14b05b82
2.6 KiB
text/x-python
diff: handle some escaped characters in Git diffs
There are some odd characters (like \r and \n) that the Kallithea UI doesn't
allow in filenames in repos. Kallithea (through the routes module) will fail to
generate URLs when browsing Files. That is a known limitation with minimal
real-world impact, non-trivial to work around or fix.
There are very few relevant use cases for tracking files with odd filenames. \t
is valid but is hard to render in a meaningful way in the UI. And ASCII
characters like \ and " are not usable on Windows and should just be avoided.
Kallithea would parse Git diffs with odd characers incorrectly or fail, even
before hitting the known limitation. With this change, Kallithea will parse
diffs with odd filenames correctly (and then hit the limitation).
Git will quote odd filenames and escape the odd characters when emitting diffs.
(Mercurial does by design not allow \r and \n , and Mercurial will thus never
have to quote file names in diffs.)
Quotes are already handled (and ignored). With this change, Kallithea will
handle \ unescaping of \\ and \", the usual letters like \r and \n and \t, and
octal numbers like \033 (for ESC) .
Filenames with \ and " will work perfectly (when not on Windows).
Filenames with \t and ESC will work fine, but without helpful display in the
UI.
Filenames with \r and \n will still make the UI fail when trying to generate
URLs.
Thanks to stypr of Flatt Security for raising this.
There are some odd characters (like \r and \n) that the Kallithea UI doesn't
allow in filenames in repos. Kallithea (through the routes module) will fail to
generate URLs when browsing Files. That is a known limitation with minimal
real-world impact, non-trivial to work around or fix.
There are very few relevant use cases for tracking files with odd filenames. \t
is valid but is hard to render in a meaningful way in the UI. And ASCII
characters like \ and " are not usable on Windows and should just be avoided.
Kallithea would parse Git diffs with odd characers incorrectly or fail, even
before hitting the known limitation. With this change, Kallithea will parse
diffs with odd filenames correctly (and then hit the limitation).
Git will quote odd filenames and escape the odd characters when emitting diffs.
(Mercurial does by design not allow \r and \n , and Mercurial will thus never
have to quote file names in diffs.)
Quotes are already handled (and ignored). With this change, Kallithea will
handle \ unescaping of \\ and \", the usual letters like \r and \n and \t, and
octal numbers like \033 (for ESC) .
Filenames with \ and " will work perfectly (when not on Windows).
Filenames with \t and ESC will work fine, but without helpful display in the
UI.
Filenames with \r and \n will still make the UI fail when trying to generate
URLs.
Thanks to stypr of Flatt Security for raising this.
aa6f17a53b49 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 0a277465fddf f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 a188803df37e a188803df37e 01aca0a4f876 a8e6bb9ee9ea 665dfa112f2c f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 a8e6bb9ee9ea f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 a8e6bb9ee9ea f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 ed2fb6e84a02 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 665dfa112f2c a188803df37e a8e6bb9ee9ea f38b50f8a6a6 f38b50f8a6a6 f38b50f8a6a6 | #!/usr/bin/env python3
"""
Consistent formatting of rst section titles
"""
import re
import subprocess
spaces = [
(0, 1), # we assume this is a over-and-underlined header
(2, 1),
(1, 1),
(1, 0),
(1, 0),
]
# http://sphinx-doc.org/rest.html :
# for the Python documentation, this convention is used which you may follow:
# # with overline, for parts
# * with overline, for chapters
# =, for sections
# -, for subsections
# ^, for subsubsections
# ", for paragraphs
pystyles = ['#', '*', '=', '-', '^', '"']
# match on a header line underlined with one of the valid characters
headermatch = re.compile(r'''\n*(.+)\n([][!"#$%&'()*+,./:;<=>?@\\^_`{|}~-])\2{2,}\n+''', flags=re.MULTILINE)
def main():
filenames = subprocess.check_output(['hg', 'loc', 'set:**.rst+kallithea/i18n/how_to']).splitlines()
for fn in filenames:
fn = fn.decode()
print('processing %s' % fn)
s = open(fn).read()
# find levels and their styles
lastpos = 0
styles = []
for markup in headermatch.findall(s):
style = markup[1]
if style in styles:
stylepos = styles.index(style)
if stylepos > lastpos + 1:
print('bad style %r with level %s - was at %s' % (style, stylepos, lastpos))
else:
stylepos = len(styles)
if stylepos > lastpos + 1:
print('bad new style %r - expected %r' % (style, styles[lastpos + 1]))
else:
styles.append(style)
lastpos = stylepos
# remove superfluous spacing (may however be restored by header spacing)
s = re.sub(r'''(\n\n)\n*''', r'\1', s, flags=re.MULTILINE)
if styles:
newstyles = pystyles[pystyles.index(styles[0]):]
def subf(m):
title, style = m.groups()
level = styles.index(style)
before, after = spaces[level]
newstyle = newstyles[level]
return '\n' * (before + 1) + title + '\n' + newstyle * len(title) + '\n' * (after + 1)
s = headermatch.sub(subf, s)
# remove superfluous spacing when headers are adjacent
s = re.sub(r'''(\n.+\n([][!"#$%&'()*+,./:;<=>?@\\^_`{|}~-])\2{2,}\n\n\n)\n*''', r'\1', s, flags=re.MULTILINE)
# fix trailing space and spacing before link sections
s = s.strip() + '\n'
s = re.sub(r'''\n+((?:\.\. _[^\n]*\n)+)$''', r'\n\n\n\1', s)
open(fn, 'w').write(s)
print(subprocess.check_output(['hg', 'diff'] + filenames))
if __name__ == '__main__':
main()
|