Changeset - 19735bc60455
[Not reviewed]
default
0 1 0
Mads Kiilerich (mads) - 6 years ago 2019-11-13 12:28:19
mads@kiilerich.com
Grafted from: 52eafdb4531d
i18n: also strip '# #, fuzzy' from header comment - it might appear when verifying branches are in sync
1 file changed with 3 insertions and 0 deletions:
0 comments (0 inline, 0 general)
scripts/i18n_utils.py
Show inline comments
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 

	
 
from __future__ import print_function
 

	
 
import os
 
import re
 
import shutil
 
import subprocess
 
import tempfile
 

	
 

	
 
do_debug = False  # set from scripts/i18n --debug
 

	
 
def debug(*args, **kwargs):
 
    if do_debug:
 
        print(*args, **kwargs)
 

	
 
def runcmd(cmd, *args, **kwargs):
 
    debug('... Executing command: %s' % ' '.join(cmd))
 
    subprocess.check_call(cmd, *args, **kwargs)
 

	
 
header_comment_strip_re = re.compile(r'''
 
    ^
 
    [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
 
    |
 
    ^
 
    [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
 
    (?:[#] \n)?
 
    |
 
    ^
 
    (?:[#] \n)?
 
    [#],[ ]fuzzy \n
 
    |
 
    ^
 
    [#][ ][#],[ ]fuzzy \n
 
    ''',
 
    re.MULTILINE|re.VERBOSE)
 

	
 
header_normalize_re = re.compile(r'''
 
    ^ "
 
    (POT-Creation-Date|PO-Revision-Date|Last-Translator|Language-Team|X-Generator|Generated-By|Project-Id-Version):
 
    [ ][^\\]*\\n
 
    " \n
 
    ''',
 
    re.MULTILINE|re.IGNORECASE|re.VERBOSE)
 

	
 
def _normalize_po(raw_content):
 
    r"""
 
    >>> print(_normalize_po(r'''
 
    ... # header comment
 
    ...
 
    ...
 
    ... # comment before header
 
    ... msgid ""
 
    ... msgstr "yada"
 
    ... "POT-Creation-Date: 2019-05-04 21:13+0200\n"
 
    ... "MIME-Version: "
 
    ... "1.0\n"
 
    ... "Last-Translator: Jabba"
 
    ... "the Hutt\n"
 
    ... "X-Generator: Weblate 1.2.3\n"
 
    ...
 
    ... # comment, but not in header
 
    ... msgid "None"
 
    ... msgstr "Ingen"
 
    ...
 
    ...
 
    ... line 2
 
    ... # third comment
 
    ...
 
    ... msgid "Special"
 
    ... msgstr ""
 
    ...
 
    ... msgid "Specialist"
 
    ... # odd comment
 
    ... msgstr ""
 
    ... "Expert"
 
    ...
 
    ... # crazy fuzzy auto translation by msgmerge, using foo for bar
 
    ... #, fuzzy
 
    ... #| msgid "some foo string"
 
    ... msgid "some bar string."
 
    ... msgstr "translation of foo string"
 
    ...
 
    ... msgid "%d minute"
 
    ... msgid_plural "%d minutes"
 
    ... msgstr[0] "minut"
 
    ... msgstr[1] "minutter"
 
    ... msgstr[2] ""
 
    ...
 
    ... msgid "%d year"
 
    ... msgid_plural "%d years"
 
    ... msgstr[0] ""
 
    ... msgstr[1] ""
 
    ...
 
    ... # last comment
 
    ... ''') + '^^^')
 
    # header comment
 
    <BLANKLINE>
 
    <BLANKLINE>
 
    # comment before header
 
    <BLANKLINE>
 
    msgid ""
 
    msgstr "yada"
 
    "MIME-Version: "
 
    "1.0\n"
 
    <BLANKLINE>
 
    msgid "None"
 
    msgstr "Ingen"
 
    <BLANKLINE>
 
    line 2
 
    <BLANKLINE>
 
    msgid "Specialist"
 
    msgstr ""
 
    "Expert"
 
    <BLANKLINE>
 
    msgid "%d minute"
 
    msgid_plural "%d minutes"
 
    msgstr[0] "minut"
 
    msgstr[1] "minutter"
 
    msgstr[2] ""
 
    ^^^
 
    """
 
    header_start = raw_content.find('\nmsgid ""\n') + 1
 
    header_end = raw_content.find('\n\n', header_start) + 1 or len(raw_content)
 
    chunks = [
 
        header_comment_strip_re.sub('', raw_content[0:header_start])
 
            .strip(),
 
        '',
 
        header_normalize_re.sub('', raw_content[header_start:header_end])
 
            .replace(
0 comments (0 inline, 0 general)