Changeset - 32757d5e9d0b
[Not reviewed]
default
0 2 0
Mads Kiilerich (mads) - 6 years ago 2020-06-12 00:36:04
mads@kiilerich.com
Grafted from: 5f05b4d7dbd5
diff: drop internal inconsistent use of '...' as context line numbers

as_html() did in some cases not check for '...' and would thus emit unnecessary
html.
2 files changed with 8 insertions and 11 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/diffs.py
Show inline comments
 
@@ -14,198 +14,195 @@
 
"""
 
kallithea.lib.diffs
 
~~~~~~~~~~~~~~~~~~~
 

	
 
Set of diffing helpers, previously part of vcs
 

	
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Dec 4, 2011
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 
import difflib
 
import logging
 
import re
 

	
 
from tg.i18n import ugettext as _
 

	
 
from kallithea.lib import helpers as h
 
from kallithea.lib.utils2 import safe_str
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.vcs.exceptions import VCSError
 
from kallithea.lib.vcs.nodes import FileNode, SubModuleNode
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def _safe_id(idstring):
 
    r"""Make a string safe for including in an id attribute.
 

	
 
    The HTML spec says that id attributes 'must begin with
 
    a letter ([A-Za-z]) and may be followed by any number
 
    of letters, digits ([0-9]), hyphens ("-"), underscores
 
    ("_"), colons (":"), and periods (".")'. These regexps
 
    are slightly over-zealous, in that they remove colons
 
    and periods unnecessarily.
 

	
 
    Whitespace is transformed into underscores, and then
 
    anything which is not a hyphen or a character that
 
    matches \w (alphanumerics and underscore) is removed.
 

	
 
    """
 
    # Transform all whitespace to underscore
 
    idstring = re.sub(r'\s', "_", idstring)
 
    # Remove everything that is not a hyphen or a member of \w
 
    idstring = re.sub(r'(?!-)\W', "", idstring).lower()
 
    return idstring
 

	
 

	
 
def as_html(table_class='code-difftable', line_class='line',
 
            old_lineno_class='lineno old', new_lineno_class='lineno new',
 
            no_lineno_class='lineno',
 
            code_class='code', enable_comments=False, parsed_lines=None):
 
    """
 
    Return given diff as html table with customized css classes
 
    """
 
    def _link_to_if(condition, label, url):
 
        """
 
        Generates a link if condition is meet or just the label if not.
 
        """
 

	
 
        if condition:
 
            return '''<a href="%(url)s" data-pseudo-content="%(label)s"></a>''' % {
 
                'url': url,
 
                'label': label
 
            }
 
        else:
 
            return label
 

	
 
    _html_empty = True
 
    _html = []
 
    _html.append('''<table class="%(table_class)s">\n''' % {
 
        'table_class': table_class
 
    })
 

	
 
    for file_info in parsed_lines:
 
        for chunk in file_info['chunks']:
 
            _html_empty = False
 
            for change in chunk:
 
                _html.append('''<tr class="%(lc)s %(action)s">\n''' % {
 
                    'lc': line_class,
 
                    'action': change['action']
 
                })
 
                anchor_old_id = ''
 
                anchor_new_id = ''
 
                anchor_old = "%(filename)s_o%(oldline_no)s" % {
 
                    'filename': _safe_id(file_info['filename']),
 
                    'oldline_no': change['old_lineno']
 
                }
 
                anchor_new = "%(filename)s_n%(newline_no)s" % {
 
                    'filename': _safe_id(file_info['filename']),
 
                    'newline_no': change['new_lineno']
 
                }
 
                cond_old = (change['old_lineno'] != '...' and
 
                            change['old_lineno'])
 
                cond_new = (change['new_lineno'] != '...' and
 
                            change['new_lineno'])
 
                no_lineno = (change['old_lineno'] == '...' and
 
                             change['new_lineno'] == '...')
 
                cond_old = change['old_lineno']
 
                cond_new = change['new_lineno']
 
                no_lineno = not change['old_lineno'] and not change['new_lineno']
 
                if cond_old:
 
                    anchor_old_id = 'id="%s"' % anchor_old
 
                if cond_new:
 
                    anchor_new_id = 'id="%s"' % anchor_new
 
                ###########################################################
 
                # OLD LINE NUMBER
 
                ###########################################################
 
                _html.append('''\t<td %(a_id)s class="%(olc)s" %(colspan)s>''' % {
 
                    'a_id': anchor_old_id,
 
                    'olc': no_lineno_class if no_lineno else old_lineno_class,
 
                    'colspan': 'colspan="2"' if no_lineno else ''
 
                })
 

	
 
                _html.append('''%(link)s''' % {
 
                    'link': _link_to_if(not no_lineno, change['old_lineno'],
 
                                        '#%s' % anchor_old)
 
                })
 
                _html.append('''</td>\n''')
 
                ###########################################################
 
                # NEW LINE NUMBER
 
                ###########################################################
 

	
 
                if not no_lineno:
 
                    _html.append('''\t<td %(a_id)s class="%(nlc)s">''' % {
 
                        'a_id': anchor_new_id,
 
                        'nlc': new_lineno_class
 
                    })
 

	
 
                    _html.append('''%(link)s''' % {
 
                        'link': _link_to_if(True, change['new_lineno'],
 
                                            '#%s' % anchor_new)
 
                    })
 
                    _html.append('''</td>\n''')
 
                ###########################################################
 
                # CODE
 
                ###########################################################
 
                comments = '' if enable_comments else 'no-comment'
 
                _html.append('''\t<td class="%(cc)s %(inc)s">''' % {
 
                    'cc': code_class,
 
                    'inc': comments
 
                })
 
                _html.append('''\n\t\t<div class="add-bubble"><div>&nbsp;</div></div><pre>%(code)s</pre>\n''' % {
 
                    'code': change['line']
 
                })
 

	
 
                _html.append('''\t</td>''')
 
                _html.append('''\n</tr>\n''')
 
    _html.append('''</table>''')
 
    if _html_empty:
 
        return None
 
    return ''.join(_html)
 

	
 

	
 
def wrap_to_table(html):
 
    """Given a string with html, return it wrapped in a table, similar to what
 
    DiffProcessor returns."""
 
    return '''\
 
              <table class="code-difftable">
 
                <tr class="line no-comment">
 
                <td class="lineno new"></td>
 
                <td class="code no-comment"><pre>%s</pre></td>
 
                </tr>
 
              </table>''' % html
 

	
 

	
 
def wrapped_diff(filenode_old, filenode_new, diff_limit=None,
 
                ignore_whitespace=True, line_context=3,
 
                enable_comments=False):
 
    """
 
    Returns a file diff wrapped into a table.
 
    Checks for diff_limit and presents a message if the diff is too big.
 
    """
 
    if filenode_old is None:
 
        filenode_old = FileNode(filenode_new.path, '', EmptyChangeset())
 

	
 
    op = None
 
    a_path = filenode_old.path # default, might be overriden by actual rename in diff
 
    if filenode_old.is_binary or filenode_new.is_binary:
 
        html_diff = wrap_to_table(_('Binary file'))
 
        stats = (0, 0)
 

	
 
    elif diff_limit != -1 and (
 
            diff_limit is None or
 
            (filenode_old.size < diff_limit and filenode_new.size < diff_limit)):
 

	
 
        raw_diff = get_gitdiff(filenode_old, filenode_new,
 
                                ignore_whitespace=ignore_whitespace,
 
                                context=line_context)
 
        diff_processor = DiffProcessor(raw_diff)
 
        if diff_processor.parsed: # there should be exactly one element, for the specified file
 
            f = diff_processor.parsed[0]
 
            op = f['operation']
 
            a_path = f['old_filename']
 

	
 
        html_diff = as_html(parsed_lines=diff_processor.parsed, enable_comments=enable_comments)
 
        stats = diff_processor.stat()
 
@@ -494,188 +491,188 @@ _git_header_re = re.compile(br"""
 
    (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
        \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
    (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
    (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
""", re.VERBOSE | re.MULTILINE)
 

	
 

	
 
_hg_header_re = re.compile(br"""
 
    ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
    (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
       ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
    (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
 
    (?:^rename[ ]from[ ](?P<rename_from>.+)\n
 
       ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
 
    (?:^copy[ ]from[ ](?P<copy_from>.+)\n
 
       ^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?
 
    (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
    (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
    (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
        \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
    (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
    (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
""", re.VERBOSE | re.MULTILINE)
 

	
 

	
 
_header_next_check = re.compile(br'''(?!@)(?!literal )(?!delta )''')
 

	
 

	
 
def _get_header(vcs, diff_chunk):
 
    """
 
    Parses a Git diff for a single file (header and chunks) and returns a tuple with:
 

	
 
    1. A dict with meta info:
 

	
 
        a_path, b_path, similarity_index, rename_from, rename_to,
 
        old_mode, new_mode, new_file_mode, deleted_file_mode,
 
        a_blob_id, b_blob_id, b_mode, a_file, b_file
 

	
 
    2. An iterator yielding lines with simple HTML markup.
 
    """
 
    match = None
 
    if vcs == 'git':
 
        match = _git_header_re.match(diff_chunk)
 
    elif vcs == 'hg':
 
        match = _hg_header_re.match(diff_chunk)
 
    if match is None:
 
        raise Exception('diff not recognized as valid %s diff' % vcs)
 
    meta_info = {k: None if v is None else safe_str(v) for k, v in match.groupdict().items()}
 
    rest = diff_chunk[match.end():]
 
    if rest and _header_next_check.match(rest):
 
        raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, safe_str(bytes(diff_chunk[:match.end()])), safe_str(bytes(rest[:1000]))))
 
    diff_lines = (_escaper(m.group(0)) for m in re.finditer(br'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
 
    return meta_info, diff_lines
 

	
 

	
 
_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 
_newline_marker = re.compile(r'^\\ No newline at end of file')
 

	
 

	
 
def _parse_lines(diff_lines):
 
    """
 
    Given an iterator of diff body lines, parse them and return a dict per
 
    line and added/removed totals.
 
    """
 
    added = deleted = 0
 
    old_line = old_end = new_line = new_end = None
 

	
 
    chunks = []
 
    try:
 
        line = next(diff_lines)
 

	
 
        while True:
 
            lines = []
 
            chunks.append(lines)
 

	
 
            match = _chunk_re.match(line)
 

	
 
            if not match:
 
                raise Exception('error parsing diff @@ line %r' % line)
 

	
 
            gr = match.groups()
 
            (old_line, old_end,
 
             new_line, new_end) = [int(x or 1) for x in gr[:-1]]
 
            old_line -= 1
 
            new_line -= 1
 

	
 
            context = len(gr) == 5
 
            old_end += old_line
 
            new_end += new_line
 

	
 
            if context:
 
                # skip context only if it's first line
 
                if int(gr[0]) > 1:
 
                    lines.append({
 
                        'old_lineno': '...',
 
                        'new_lineno': '...',
 
                        'old_lineno': '',
 
                        'new_lineno': '',
 
                        'action':     'context',
 
                        'line':       line,
 
                    })
 

	
 
            line = next(diff_lines)
 

	
 
            while old_line < old_end or new_line < new_end:
 
                if not line:
 
                    raise Exception('error parsing diff - empty line at -%s+%s' % (old_line, new_line))
 

	
 
                affects_old = affects_new = False
 

	
 
                command = line[0]
 
                if command == '+':
 
                    affects_new = True
 
                    action = 'add'
 
                    added += 1
 
                elif command == '-':
 
                    affects_old = True
 
                    action = 'del'
 
                    deleted += 1
 
                elif command == ' ':
 
                    affects_old = affects_new = True
 
                    action = 'unmod'
 
                else:
 
                    raise Exception('error parsing diff - unknown command in line %r at -%s+%s' % (line, old_line, new_line))
 

	
 
                if not _newline_marker.match(line):
 
                    old_line += affects_old
 
                    new_line += affects_new
 
                    lines.append({
 
                        'old_lineno':   affects_old and old_line or '',
 
                        'new_lineno':   affects_new and new_line or '',
 
                        'action':       action,
 
                        'line':         line[1:],
 
                    })
 

	
 
                line = next(diff_lines)
 

	
 
                if _newline_marker.match(line):
 
                    # we need to append to lines, since this is not
 
                    # counted in the line specs of diff
 
                    lines.append({
 
                        'old_lineno':   '...',
 
                        'new_lineno':   '...',
 
                        'old_lineno':   '',
 
                        'new_lineno':   '',
 
                        'action':       'context',
 
                        'line':         line,
 
                    })
 
                    line = next(diff_lines)
 
            if old_line > old_end:
 
                raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
 
            if new_line > new_end:
 
                raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
 
    except StopIteration:
 
        pass
 
    if old_line != old_end or new_line != new_end:
 
        raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))
 

	
 
    return chunks, added, deleted
 

	
 
# Used for inline highlighter word split, must match the substitutions in _escaper
 
_token_re = re.compile(r'()(&amp;|&lt;|&gt;|<u>\t</u>|<u class="cr"></u>| <i></i>|\W+?)')
 

	
 

	
 
def _highlight_inline_diff(old, new):
 
    """
 
    Highlight simple add/remove in two lines given as info dicts. They are
 
    modified in place and given markup with <del>/<ins>.
 
    """
 
    assert old['action'] == 'del'
 
    assert new['action'] == 'add'
 

	
 
    oldwords = _token_re.split(old['line'])
 
    newwords = _token_re.split(new['line'])
 
    sequence = difflib.SequenceMatcher(None, oldwords, newwords)
 

	
 
    oldfragments, newfragments = [], []
 
    for tag, i1, i2, j1, j2 in sequence.get_opcodes():
 
        oldfrag = ''.join(oldwords[i1:i2])
 
        newfrag = ''.join(newwords[j1:j2])
 
        if tag != 'equal':
 
            if oldfrag:
 
                oldfrag = '<del>%s</del>' % oldfrag
 
            if newfrag:
 
                newfrag = '<ins>%s</ins>' % newfrag
 
        oldfragments.append(oldfrag)
 
        newfragments.append(newfrag)
 

	
 
    old['line'] = "".join(oldfragments)
 
    new['line'] = "".join(newfragments)
kallithea/tests/models/test_diff_parsers.py
Show inline comments
 
@@ -202,113 +202,113 @@ DIFF_FIXTURES = {
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/backends/git/repository.py', 'modified',
 
         {'added': 46,
 
          'deleted': 15,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/backends/hg.py', 'modified',
 
         {'added': 22,
 
          'deleted': 3,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/tests/test_git.py', 'modified',
 
         {'added': 5,
 
          'deleted': 5,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/tests/test_repository.py', 'modified',
 
         {'added': 174,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'git_diff_modify_binary_file.diff': [
 
        ('file.name', 'modified',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {MOD_FILENODE: 'modified file',
 
                  BIN_FILENODE: 'binary diff not shown'}})
 
    ],
 
    'hg_diff_copy_file.diff': [
 
        ('file2', 'modified',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {COPIED_FILENODE: 'file copied from file1 to file2'}}),
 
    ],
 
    'hg_diff_copy_and_modify_file.diff': [
 
        ('file3', 'modified',
 
         {'added': 1,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {COPIED_FILENODE: 'file copied from file2 to file3',
 
                  MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'hg_diff_copy_and_chmod_file.diff': [
 
        ('file4', 'modified',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
 
                  CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
 
    ],
 
    'hg_diff_copy_chmod_and_edit_file.diff': [
 
        ('file5', 'modified',
 
         {'added': 2,
 
          'deleted': 1,
 
          'binary': False,
 
          'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
 
                  CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
 
                  MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'hg_diff_rename_space_cr.diff': [
 
        ('oh yes', 'renamed',
 
         {'added': 3,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {RENAMED_FILENODE: 'file renamed from oh no to oh yes'}}),
 
    ],
 
}
 

	
 

	
 
class TestDiffLib(base.TestController):
 

	
 
    @base.parametrize('diff_fixture', DIFF_FIXTURES)
 
    def test_diff(self, diff_fixture):
 
        raw_diff = fixture.load_resource(diff_fixture, strip=False)
 
        vcs = 'hg'
 
        if diff_fixture.startswith('git_'):
 
            vcs = 'git'
 
        diff_processor = DiffProcessor(raw_diff, vcs=vcs)
 
        data = [(x['filename'], x['operation'], x['stats']) for x in diff_processor.parsed]
 
        expected_data = DIFF_FIXTURES[diff_fixture]
 
        assert expected_data == data
 

	
 
    def test_diff_markup(self):
 
        raw_diff = fixture.load_resource('markuptest.diff', strip=False)
 
        diff_processor = DiffProcessor(raw_diff)
 
        chunks = diff_processor.parsed[0]['chunks']
 
        assert not chunks[0]
 
        #from pprint import pprint; pprint(chunks[1])
 
        l = ['\n']
 
        for d in chunks[1]:
 
            l.append('%(action)-7s %(new_lineno)3s %(old_lineno)3s %(line)r\n' % d)
 
        s = ''.join(l)
 
        assert s == r'''
 
context ... ... '@@ -51,6 +51,13 @@\n'
 
context         '@@ -51,6 +51,13 @@\n'
 
unmod    51  51 '<u>\t</u>begin();\n'
 
unmod    52  52 '<u>\t</u>\n'
 
add      53     '<u>\t</u>int foo;<u class="cr"></u>\n'
 
add      54     '<u>\t</u>int bar; <u class="cr"></u>\n'
 
add      55     '<u>\t</u>int baz;<u>\t</u><u class="cr"></u>\n'
 
add      56     '<u>\t</u>int space; <i></i>'
 
add      57     '<u>\t</u>int tab;<u>\t</u>\n'
 
add      58     '<u>\t</u>\n'
 
unmod    59  53 ' <i></i>'
 
del          54 '<u>\t</u>#define MAX_STEPS (48)\n'
 
add      60     '<u>\t</u><u class="cr"></u>\n'
 
add      61     '<u>\t</u>#define MAX_STEPS (64)<u class="cr"></u>\n'
 
unmod    62  55 '\n'
 
del          56 '<u>\t</u>#define MIN_STEPS (<del>48</del>)\n'
 
add      63     '<u>\t</u>#define MIN_STEPS (<ins>42</ins>)\n'
 
'''
0 comments (0 inline, 0 general)