Changeset - 93dabafa567e
[Not reviewed]
default
0 2 0
Thomas De Schampheleire - 6 years ago 2019-11-07 01:52:16
thomas.de_schampheleire@nokia.com
Grafted from: 43667e4c14dd
scripts/i18n: add command 'normalized-diff'

Add a command 'normalized-diff' that takes two (po) files and diff normalized
copies of them.
2 files changed with 26 insertions and 0 deletions:
0 comments (0 inline, 0 general)
scripts/i18n
Show inline comments
 
#!/usr/bin/env python3
 

	
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 

	
 
import sys
 

	
 
import click
 

	
 
import i18n_utils
 

	
 

	
 
"""
 
Tool for maintenance of .po and .pot files
 

	
 
Normally, the i18n-related files contain for each translatable string a
 
reference to all the source code locations where this string is found. This
 
meta data is useful for translators to assess how strings are used, but is not
 
relevant for normal development nor for running Kallithea. Such meta data, or
 
derived data like kallithea.pot, will inherently be outdated, and create
 
unnecessary churn and repository growth, making it harder to spot actual and
 
important changes.
 
"""
 

	
 
@click.group()
 
@click.option('--debug/--no-debug', default=False)
 
def cli(debug):
 
    if (debug):
 
        i18n_utils.do_debug = True
 
    pass
 

	
 
@cli.command()
 
@click.argument('po_files', nargs=-1)
 
def normalize_po_files(po_files):
 
    """Normalize the specified .po and .pot files.
 

	
 
    Only actual translations and essential headers will be preserved.
 
    """
 
    for po_file in po_files:
 
        i18n_utils._normalize_po_file(po_file, strip=True)
 

	
 
@cli.command()
 
@click.argument('file1')
 
@click.argument('file2')
 
def normalized_diff(file1, file2):
 
    """Compare two files while transparently normalizing them."""
 
    sys.exit(i18n_utils._normalized_diff(file1, file2, strip=True))
 

	
 
if __name__ == '__main__':
 
    cli()
scripts/i18n_utils.py
Show inline comments
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 

	
 
from __future__ import print_function
 

	
 
import os
 
import re
 
import shutil
 
import subprocess
 
import tempfile
 

	
 

	
 
do_debug = False  # set from scripts/i18n --debug
 

	
 
def debug(*args, **kwargs):
 
    if do_debug:
 
        print(*args, **kwargs)
 

	
 
def runcmd(cmd, *args, **kwargs):
 
    debug('... Executing command: %s' % ' '.join(cmd))
 
    subprocess.check_call(cmd, *args, **kwargs)
 

	
 
header_comment_strip_re = re.compile(r'''
 
    ^
 
    [#][ ]Translations[ ]template[ ]for[ ]Kallithea[.] \n
 
    |
 
    ^
 
    [#][ ]FIRST[ ]AUTHOR[ ]<EMAIL@ADDRESS>,[ ]\d+[.] \n
 
    [#] \n
 
    [#],[ ]fuzzy \n
 
    ''',
 
    re.MULTILINE|re.VERBOSE)
 

	
 
header_normalize_re = re.compile(r'''
 
@@ -144,24 +146,40 @@ def _normalize_po(raw_content):
 
            if line
 
            and not line.startswith('#')
 
        ]
 
        if not chunk_lines:
 
            continue
 
        # check lines starting from first msgstr, skip chunk if no translation lines
 
        msgstr_i = [i for i, line in enumerate(chunk_lines) if line.startswith('msgstr')]
 
        if (
 
            chunk_lines[0].startswith('msgid') and
 
            msgstr_i and
 
            all(line.endswith(' ""') for line in chunk_lines[msgstr_i[0]:])
 
        ):  # skip translation chunks that doesn't have any actual translations
 
            continue
 
        chunks.append('\n'.join(chunk_lines) + '\n')
 
    return '\n'.join(chunks)
 

	
 
def _normalize_po_file(po_file, strip=False):
 
    if strip:
 
        po_tmp = po_file + '.tmp'
 
        with open(po_file, 'r') as src, open(po_tmp, 'w') as dest:
 
            raw_content = src.read()
 
            normalized_content = _normalize_po(raw_content)
 
            dest.write(normalized_content)
 
        os.rename(po_tmp, po_file)
 

	
 
def _normalized_diff(file1, file2, strip=False):
 
    # Create temporary copies of both files
 
    temp1 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file1))
 
    temp2 = tempfile.NamedTemporaryFile(prefix=os.path.basename(file2))
 
    debug('normalized_diff: %s -> %s / %s -> %s' % (file1, temp1.name, file2, temp2.name))
 
    shutil.copyfile(file1, temp1.name)
 
    shutil.copyfile(file2, temp2.name)
 
    # Normalize them in place
 
    _normalize_po_file(temp1.name, strip=strip)
 
    _normalize_po_file(temp2.name, strip=strip)
 
    # Now compare
 
    try:
 
        runcmd(['diff', '-u', temp1.name, temp2.name])
 
    except subprocess.CalledProcessError as e:
 
        return e.returncode
0 comments (0 inline, 0 general)