Files @ 3c503044e9f1
Branch filter:

Location: kallithea/scripts/logformat.py

mads
mysql: bump sqlalchemy.url MariaDB/MySQL charset to to 'utf8mb4' to get full UTF-8 support

The change in 210e76d69b62 only changed character_set_database, as shown by output after:

--- a/kallithea/model/base.py
+++ b/kallithea/model/base.py
@@ -46,3 +46,8 @@ def init_model(engine):
engine_str = obfuscate_url_pw(str(engine.url))
log.info("initializing db for %s", engine_str)
meta.Base.metadata.bind = engine
+
+ meta.Session.configure(bind=engine)
+ for a, b in meta.Session().execute('''show variables''').fetchall():
+ if 'character_set_' in a:
+ print(a, repr(b))

Before, with charset=utf8, the utf8mb3 charset was used all the way through the stack:

[kallithea.model.base] initializing db for mysql://kallithea-test:XXXXX@localhost/kallithea-test?charset=utf8
character_set_client 'utf8'
character_set_connection 'utf8'
character_set_database 'utf8mb4'
character_set_filesystem 'binary'
character_set_results 'utf8'
character_set_server 'latin1'
character_set_system 'utf8'

With explicit charset=utf8mb4:

[kallithea.model.base] initializing db for mysql://kallithea-test:XXXXX@localhost/kallithea-test?charset=utf8mb4
character_set_client 'utf8mb4'
character_set_connection 'utf8mb4'
character_set_database 'utf8mb4'
character_set_filesystem 'binary'
character_set_results 'utf8mb4'
character_set_server 'latin1'
character_set_system 'utf8'
#!/usr/bin/env python3

import re
import sys


logre = r'''
(log\.(?:error|info|warning|debug)
[(][ \n]*
)
%s
(
[ \n]*[)]
)
'''


res = [
    # handle % () - keeping spaces around the old %
    (re.compile(logre % r'''("[^"]*"|'[^']*')   ([\n ]*) %  ([\n ]*) \( ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* ) \) ''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
    # handle % without () - keeping spaces around the old %
    (re.compile(logre % r'''("[^"]*"|'[^']*')   ([\n ]*) %  ([\n ]*)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* )    ''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
    # remove extra space if it is on next line
    (re.compile(logre % r'''("[^"]*"|'[^']*') , (\n [ ]) ([ ][\n ]*)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* )    ''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
    # remove extra space if it is on same line
    (re.compile(logre % r'''("[^"]*"|'[^']*') , [ ]+  () (   [\n ]+)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* )    ''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
    # remove trailing , and space
    (re.compile(logre % r'''("[^"]*"|'[^']*') ,       () (   [\n ]*)    ( (?:[^()]|\n)* (?: \( (?:[^()]|\n)* \) (?:[^()]|\n)* )* [^(), \n] ) [ ,]*''', flags=re.MULTILINE | re.VERBOSE), r'\1\2,\3\4\5\6'),
    ]


def rewrite(f):
    s = open(f).read()
    for r, t in res:
        s = r.sub(t, s)
    open(f, 'w').write(s)


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print('Cleanup of superfluous % formatting of log statements.')
        print('Usage:')
        print('''  hg revert `hg loc '*.py'|grep -v logformat.py` && scripts/logformat.py `hg loc '*.py'` && hg diff''')
        raise SystemExit(1)

    for f in sys.argv[1:]:
        rewrite(f)