Changeset - 30e137b4ff18
[Not reviewed]
default
0 1 0
Mads Kiilerich (mads) - 6 years ago 2019-12-19 20:47:55
mads@kiilerich.com
Grafted from: af9b446f297c
scripts/i18n: also normalize casing of UTF-8 in Content-Type

f626260a376c introduced invariant msgmerge casing. Do the same when normalizing
to ensure consistency also without msgmerge and to avoid unnecessary conflicts.
1 file changed with 3 insertions and 0 deletions:
0 comments (0 inline, 0 general)
scripts/i18n_utils.py
Show inline comments
 
@@ -112,48 +112,51 @@ def _normalize_po(raw_content):
 
    <BLANKLINE>
 
    msgid "None"
 
    msgstr "Ingen"
 
    <BLANKLINE>
 
    line 2
 
    <BLANKLINE>
 
    msgid "Specialist"
 
    msgstr ""
 
    "Expert"
 
    <BLANKLINE>
 
    msgid "%d minute"
 
    msgid_plural "%d minutes"
 
    msgstr[0] "minut"
 
    msgstr[1] "minutter"
 
    msgstr[2] ""
 
    ^^^
 
    """
 
    header_start = raw_content.find('\nmsgid ""\n') + 1
 
    header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
 
    chunks = [
 
        header_comment_strip_re.sub('', raw_content[0:header_start])
 
            .strip(),
 
        '',
 
        header_normalize_re.sub('', raw_content[header_start:header_end])
 
            .replace(
 
                r'"Content-Type: text/plain; charset=utf-8\n"',
 
                r'"Content-Type: text/plain; charset=UTF-8\n"')  # maintain msgmerge casing
 
            .strip(),
 
        '']  # preserve normalized header
 
    # all chunks are separated by empty line
 
    for raw_chunk in raw_content[header_end:].split('\n\n'):
 
        if '\n#, fuzzy' in raw_chunk:  # might be like "#, fuzzy, python-format"
 
            continue  # drop crazy auto translation that is worse than useless
 
        # strip all comment lines from chunk
 
        chunk_lines = [
 
            line
 
            for line in raw_chunk.splitlines()
 
            if line
 
            and not line.startswith('#')
 
        ]
 
        if not chunk_lines:
 
            continue
 
        # check lines starting from first msgstr, skip chunk if no translation lines
 
        msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
 
        if (
 
            chunk_lines[0].startswith('msgid') and
 
            msgstr_i and
 
            all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
 
        ):  # skip translation chunks that doesn't have any actual translations
 
            continue
 
        chunks.append('\n'.join(chunk_lines) + '\n')
0 comments (0 inline, 0 general)