Files @ ad239692ea95
Branch filter:

Location: kallithea/scripts/logformat.py - annotation

mads
mail: fix duplicate "From" headers

Problem introduced in 9a0c41175e66: When iterating the headers dict and setting
"msg[key] = value", it wasn't replacing the header but performing add_header so
we sometimes ended up with two From headers.

It is also a general problem that while the headers dict only can contain each
key once, it can contain entries that only differ in casing and thus will fold
to the same message header, making it possible to end up adding duplicate
headers.

"msg.replace_header(key, value)" is not a simple solution to the problem: it
will raise KeyError if no such previous key exists.

Now, make the problem more clear by explicitly using add_header.

Avoid the duplication problem by deleting the key (no matter which casing)
before invoking add_header. Delete promises that "No exception is raised if the
named field isn’t present in the headers".
#!/usr/bin/env python3

import re
import sys


logre = r'''
(log\.(?:error|info|warning|debug)
[(][ \n]*
)
%s
(
[ \n]*[)]
)
'''


res = [
    # handle % () - keeping spaces around the old %
    (re.compile(logre % r'''("[^"]*"|'[^']*')   ([\n ]*) %  ([\n ]*) \( ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* ) \) ''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
    # handle % without () - keeping spaces around the old %
    (re.compile(logre % r'''("[^"]*"|'[^']*')   ([\n ]*) %  ([\n ]*)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* )    ''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
    # remove extra space if it is on next line
    (re.compile(logre % r'''("[^"]*"|'[^']*') , (\n [ ]) ([ ][\n ]*)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* )    ''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
    # remove extra space if it is on same line
    (re.compile(logre % r'''("[^"]*"|'[^']*') , [ ]+  () (   [\n ]+)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* )    ''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
    # remove trailing , and space
    (re.compile(logre % r'''("[^"]*"|'[^']*') ,       () (   [\n ]*)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* [^(), \n] ) [ ,]*''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
    ]


def rewrite(f):
    s = open(f).read()
    for r, t in res:
        s = r.sub(t, s)
    open(f, 'w').write(s)


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print('Cleanup of superfluous % formatting of log statements.')
        print('Usage:')
        print('''  hg revert `hg loc '*.py'|grep -v logformat.py` && scripts/logformat.py `hg loc '*.py'` && hg diff''')
        raise SystemExit(1)

    for f in sys.argv[1:]:
        rewrite(f)