Files
@ aa51aca7fd1a
Branch filter:
Location: kallithea/scripts/logformat.py - annotation
aa51aca7fd1a
1.8 KiB
text/x-python
controller: Handle UnicodeDecodeError from webob decoding invalid URLs
webob will try to utf-8 decode all %-encoded bytes in URL-parameters, but will
not handle Unicode erors ... and neither did Kallithea. Visiting a URL like
http://localhost:5000/?%AD would thus give an unhandled exception showing
"Internal Server Error" to the user, and logging the full traceback and:
WebApp Error: UnicodeDecodeError: 'utf-8' codec can't decode byte 0xad in position 0: invalid start byte
This has been seen a lot recently from attackers probing for a php
vulnerability
https://devco.re/blog/2024/06/06/security-alert-cve-2024-4577-php-cgi-argument-injection-vulnerability-en/ .
Now handle these exceptions more nicely and reject with "400 Bad Request".
webob will try to utf-8 decode all %-encoded bytes in URL-parameters, but will
not handle Unicode erors ... and neither did Kallithea. Visiting a URL like
http://localhost:5000/?%AD would thus give an unhandled exception showing
"Internal Server Error" to the user, and logging the full traceback and:
WebApp Error: UnicodeDecodeError: 'utf-8' codec can't decode byte 0xad in position 0: invalid start byte
This has been seen a lot recently from attackers probing for a php
vulnerability
https://devco.re/blog/2024/06/06/security-alert-cve-2024-4577-php-cgi-argument-injection-vulnerability-en/ .
Now handle these exceptions more nicely and reject with "400 Bad Request".
aa6f17a53b49 8bc8366a6874 8bc8366a6874 8bc8366a6874 8bc8366a6874 0a277465fddf 8bc8366a6874 8bc8366a6874 8bc8366a6874 8bc8366a6874 8bc8366a6874 8bc8366a6874 8bc8366a6874 8bc8366a6874 8bc8366a6874 4473f1094d3d 4473f1094d3d 8bc8366a6874 8bc8366a6874 63b548dd5ef3 8bc8366a6874 63b548dd5ef3 8bc8366a6874 63b548dd5ef3 8bc8366a6874 63b548dd5ef3 8bc8366a6874 63b548dd5ef3 8bc8366a6874 8bc8366a6874 4473f1094d3d 4473f1094d3d 665dfa112f2c 8bc8366a6874 8bc8366a6874 665dfa112f2c 4473f1094d3d 4473f1094d3d 4473f1094d3d 4473f1094d3d a8e6bb9ee9ea a8e6bb9ee9ea 0a84ef075575 4473f1094d3d 4473f1094d3d 4473f1094d3d 4473f1094d3d | #!/usr/bin/env python3
import re
import sys
logre = r'''
(log\.(?:error|info|warning|debug)
[(][ \n]*
)
%s
(
[ \n]*[)]
)
'''
res = [
# handle % () - keeping spaces around the old %
(re.compile(logre % r'''("[^"]*"|'[^']*') ([\n ]*) % ([\n ]*) \( ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* ) \) ''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
# handle % without () - keeping spaces around the old %
(re.compile(logre % r'''("[^"]*"|'[^']*') ([\n ]*) % ([\n ]*) ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* ) ''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
# remove extra space if it is on next line
(re.compile(logre % r'''("[^"]*"|'[^']*') , (\n [ ]) ([ ][\n ]*) ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* ) ''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
# remove extra space if it is on same line
(re.compile(logre % r'''("[^"]*"|'[^']*') , [ ]+ () ( [\n ]+) ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* ) ''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
# remove trailing , and space
(re.compile(logre % r'''("[^"]*"|'[^']*') , () ( [\n ]*) ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* [^(), \n] ) [ ,]*''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
]
def rewrite(f):
s = open(f).read()
for r, t in res:
s = r.sub(t, s)
open(f, 'w').write(s)
if __name__ == '__main__':
if len(sys.argv) < 2:
print('Cleanup of superfluous % formatting of log statements.')
print('Usage:')
print(''' hg revert `hg files 'set:**.py'|grep -v logformat.py` && scripts/logformat.py `hg files 'set:**.py'` && hg diff''')
raise SystemExit(1)
for f in sys.argv[1:]:
rewrite(f)
|