kallithea Changeset - 53142fd5af4e

Changeset - 53142fd5af4e

Parent rev.

Child rev.

[Not reviewed]

default

0 2 0

Thomas De Schampheleire - 5 years ago 2020-10-19 12:47:50
thomas.de_schampheleire@nokia.com

Grafted from: 59f33539b8ea

lib/diffs: make sure that trailing tabs are indicated

Between the initial submission and final version of commit f79c40759d6f,
changes were made that turn out to be incorrect. The changes assume that the
later match on trailing tabs will 'win' from the plain 'tab' match. However,
Python 're' documentation says:

As the target string is scanned, REs separated by '|' are tried from
left to right. When one pattern completely matches, that branch is
accepted. This means that once A matches, B will not be tested further,
even if it would produce a longer overall match. In other words, the '|'
operator is never greedy.
https://docs.python.org/3.8/library/re.html

As a result, a trailing tab is seen as a plain tab and not highlighted in a
special way.

Unify the tab handling to make it unambiguous how they should be parsed.

The change diff mainly shows re group numbers shifting.

2 files changed with 13 insertions and 13 deletions:

kallithea/lib/diffs.py

kallithea/tests/models/test_diff_parsers.py

0 comments (0 inline, 0 general)

kallithea/lib/diffs.py

➞

Show inline comments

@@ @@ -352,255 +352,255 @@ class DiffProcessor(object): @@
                         op = 'added'
                         stats['binary'] = True
                         stats['ops'][NEW_FILENODE] = 'new file'
                     elif head['a_file'] and not head['b_file']:
                         op = 'removed'
                         stats['binary'] = True
                         stats['ops'][DEL_FILENODE] = 'deleted file'
                 # it's not ADD not DELETE
                 if op is None:
                     op = 'modified'
                     stats['binary'] = True
                     stats['ops'][MOD_FILENODE] = 'modified file'
             # a real non-binary diff
             if head['a_file'] or head['b_file']:
                 chunks, added, deleted = _parse_lines(diff_lines)
                 stats['binary'] = False
                 stats['added'] = added
                 stats['deleted'] = deleted
                 # explicit mark that it's a modified file
                 if op == 'modified':
                     stats['ops'][MOD_FILENODE] = 'modified file'
             else:  # Git binary patch (or empty diff)
                 # Git binary patch
                 if head['bin_patch']:
                     stats['ops'][BIN_FILENODE] = 'binary diff not shown'
                 chunks = []
             if op == 'removed' and chunks:
                 # a way of seeing deleted content could perhaps be nice - but
                 # not with the current UI
                 chunks = []
             chunks.insert(0, [{
                 'old_lineno': '',
                 'new_lineno': '',
                 'action':     'context',
                 'line':       msg,
                 } for _op, msg in stats['ops'].items()
                   if _op not in [MOD_FILENODE]])
             _files.append({
                 'old_filename':     head['a_path'],
                 'filename':         head['b_path'],
                 'old_revision':     head['a_blob_id'],
                 'new_revision':     head['b_blob_id'],
                 'chunks':           chunks,
                 'operation':        op,
                 'stats':            stats,
             })
         if not inline_diff:
             return _files
         # highlight inline changes when one del is followed by one add
         for diff_data in _files:
             for chunk in diff_data['chunks']:
                 lineiter = iter(chunk)
                 try:
                     peekline = next(lineiter)
                     while True:
                         # find a first del line
                         while peekline['action'] != 'del':
                             peekline = next(lineiter)
                         delline = peekline
                         peekline = next(lineiter)
                         # if not followed by add, eat all following del lines
                         if peekline['action'] != 'add':
                             while peekline['action'] == 'del':
                                 peekline = next(lineiter)
                             continue
                         # found an add - make sure it is the only one
                         addline = peekline
                         try:
                             peekline = next(lineiter)
                         except StopIteration:
                             # add was last line - ok
                             _highlight_inline_diff(delline, addline)
                             raise
                         if peekline['action'] != 'add':
                             # there was only one add line - ok
                             _highlight_inline_diff(delline, addline)
                 except StopIteration:
                     pass
         return _files
     def stat(self):
         """
         Returns tuple of added, and removed lines for this instance
         """
         return self.adds, self.removes
-_escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|( $)|(\t$)')
+_escape_re = re.compile(r'(&)|(<)|(>)|(\t)($)?|(\r)|( $)')
 def _escaper(diff_line):
     r"""
     Do HTML escaping/markup of a single diff line (including first +/- column)
     >>> _escaper('foobar')
     'foobar'
     >>> _escaper('@foo & bar')
     '@foo &amp; bar'
     >>> _escaper('+foo < bar')
     '+foo &lt; bar'
     >>> _escaper('-foo > bar')
     '-foo &gt; bar'
     >>> _escaper(' <foo>')
     ' &lt;foo&gt;'
     >>> _escaper(' foo\tbar')
     ' foo<u>\t</u>bar'
     >>> _escaper(' foo\rbar\r')
     ' foo<u class="cr"></u>bar<u class="cr"></u>'
     >>> _escaper(' foo\t')
     ' foo<u>\t</u>'
+    ' foo<u>\t</u><i></i>'
     >>> _escaper(' foo ')
     ' foo <i></i>'
     >>> _escaper(' foo  ')
     ' foo  <i></i>'
     >>> _escaper(' ')
     ' '
     >>> _escaper('  ')
     '  <i></i>'
     >>> _escaper(' \t')
     ' <u>\t</u>'
+    ' <u>\t</u><i></i>'
     >>> _escaper(' \t  ')
     ' <u>\t</u>  <i></i>'
     >>> _escaper('   \t')
     '   <u>\t</u>'
+    '   <u>\t</u><i></i>'
     >>> _escaper(' \t\t  ')
     ' <u>\t</u><u>\t</u>  <i></i>'
     >>> _escaper('   \t\t')
     '   <u>\t</u><u>\t</u>'
+    '   <u>\t</u><u>\t</u><i></i>'
     >>> _escaper(' foo&bar<baz>  ')
     ' foo&amp;bar&lt;baz&gt;  <i></i>'
     """
     def substitute(m):
         groups = m.groups()
         if groups[0]:
             return '&amp;'
         if groups[1]:
             return '&lt;'
         if groups[2]:
             return '&gt;'
         if groups[3]:
             return '<u>\t</u>'  # Note: trailing tabs will get a longer match later
         if groups[4]:
             if groups[4] is not None:  # end of line
                 return '<u>\t</u><i></i>'
             return '<u>\t</u>'
         if groups[5]:
             return '<u class="cr"></u>'
-        if groups[5]:
+        if groups[6]:
             if m.start() == 0:
                 return ' '  # first column space shouldn't make empty lines show up as trailing space
             return ' <i></i>'
         if groups[6]:
             return '<u>\t</u><i></i>'
         assert False
     return _escape_re.sub(substitute, diff_line)
 _git_header_re = re.compile(br"""
     ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
     (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
        ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
     (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
        ^rename[ ]from[ ](?P<rename_from>.+)\n
        ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
     (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
     (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
     (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
         \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
     (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
     (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
     (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 """, re.VERBOSE | re.MULTILINE)
 _hg_header_re = re.compile(br"""
     ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
     (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
        ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
     (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
     (?:^rename[ ]from[ ](?P<rename_from>.+)\n
        ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
     (?:^copy[ ]from[ ](?P<copy_from>.+)\n
        ^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?
     (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
     (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
     (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
         \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
     (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
     (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
     (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 """, re.VERBOSE | re.MULTILINE)
 _header_next_check = re.compile(br'''(?!@)(?!literal )(?!delta )''')
 def _get_header(vcs, diff_chunk):
     """
     Parses a Git diff for a single file (header and chunks) and returns a tuple with:
 . A dict with meta info:
         a_path, b_path, similarity_index, rename_from, rename_to,
         old_mode, new_mode, new_file_mode, deleted_file_mode,
         a_blob_id, b_blob_id, b_mode, a_file, b_file
 . An iterator yielding lines with simple HTML markup.
     """
     match = None
     if vcs == 'git':
         match = _git_header_re.match(diff_chunk)
     elif vcs == 'hg':
         match = _hg_header_re.match(diff_chunk)
     if match is None:
         raise Exception('diff not recognized as valid %s diff' % vcs)
     meta_info = {k: None if v is None else safe_str(v) for k, v in match.groupdict().items()}
     rest = diff_chunk[match.end():]
     if rest:
         if _header_next_check.match(rest):
             raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, safe_str(bytes(diff_chunk[:match.end()])), safe_str(bytes(rest[:1000]))))
         if rest[-1:] != b'\n':
             # The diff will generally already have trailing \n (and be a memoryview). It might also be huge so we don't want to allocate it twice. But in this very rare case, we don't care.
             rest = bytes(rest) + b'\n'
     diff_lines = (_escaper(safe_str(m.group(1))) for m in re.finditer(br'(.*)\n', rest))
     return meta_info, diff_lines
 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 _newline_marker = re.compile(r'^\\ No newline at end of file')
 def _parse_lines(diff_lines):
     """
     Given an iterator of diff body lines, parse them and return a dict per
     line and added/removed totals.
     """
     added = deleted = 0
     old_line = old_end = new_line = new_end = None
     chunks = []
     try:
         line = next(diff_lines)
         while True:
             lines = []
             chunks.append(lines)
             match = _chunk_re.match(line)

kallithea/tests/models/test_diff_parsers.py

➞

Show inline comments

@@ @@ -204,111 +204,111 @@ DIFF_FIXTURES = { @@
          {'added': 46,
           'deleted': 15,
           'binary': False,
           'ops': {MOD_FILENODE: 'modified file'}}),
         ('vcs/backends/hg.py', 'modified',
          {'added': 22,
           'deleted': 3,
           'binary': False,
           'ops': {MOD_FILENODE: 'modified file'}}),
         ('vcs/tests/test_git.py', 'modified',
          {'added': 5,
           'deleted': 5,
           'binary': False,
           'ops': {MOD_FILENODE: 'modified file'}}),
         ('vcs/tests/test_repository.py', 'modified',
          {'added': 174,
           'deleted': 2,
           'binary': False,
           'ops': {MOD_FILENODE: 'modified file'}}),
     ],
     'git_diff_modify_binary_file.diff': [
         ('file.name', 'modified',
          {'added': 0,
           'deleted': 0,
           'binary': True,
           'ops': {MOD_FILENODE: 'modified file',
                   BIN_FILENODE: 'binary diff not shown'}})
     ],
     'hg_diff_copy_file.diff': [
         ('file2', 'modified',
          {'added': 0,
           'deleted': 0,
           'binary': True,
           'ops': {COPIED_FILENODE: 'file copied from file1 to file2'}}),
     ],
     'hg_diff_copy_and_modify_file.diff': [
         ('file3', 'modified',
          {'added': 1,
           'deleted': 0,
           'binary': False,
           'ops': {COPIED_FILENODE: 'file copied from file2 to file3',
                   MOD_FILENODE: 'modified file'}}),
     ],
     'hg_diff_copy_and_chmod_file.diff': [
         ('file4', 'modified',
          {'added': 0,
           'deleted': 0,
           'binary': True,
           'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
                   CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
     ],
     'hg_diff_copy_chmod_and_edit_file.diff': [
         ('file5', 'modified',
          {'added': 2,
           'deleted': 1,
           'binary': False,
           'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
                   CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
                   MOD_FILENODE: 'modified file'}}),
     ],
     'hg_diff_rename_space_cr.diff': [
         ('oh yes', 'renamed',
          {'added': 3,
           'deleted': 2,
           'binary': False,
           'ops': {RENAMED_FILENODE: 'file renamed from oh no to oh yes'}}),
     ],
+}
 class TestDiffLib(base.TestController):
     @base.parametrize('diff_fixture', DIFF_FIXTURES)
     def test_diff(self, diff_fixture):
         raw_diff = fixture.load_resource(diff_fixture, strip=False)
         vcs = 'hg'
         if diff_fixture.startswith('git_'):
             vcs = 'git'
         diff_processor = DiffProcessor(raw_diff, vcs=vcs)
         data = [(x['filename'], x['operation'], x['stats']) for x in diff_processor.parsed]
         expected_data = DIFF_FIXTURES[diff_fixture]
         assert expected_data == data
     def test_diff_markup(self):
         raw_diff = fixture.load_resource('markuptest.diff', strip=False)
         diff_processor = DiffProcessor(raw_diff)
         chunks = diff_processor.parsed[0]['chunks']
         assert not chunks[0]
         #from pprint import pprint; pprint(chunks[1])
         l = ['\n']
         for d in chunks[1]:
             l.append('%(action)-7s %(new_lineno)3s %(old_lineno)3s %(line)r\n' % d)
         s = ''.join(l)
         assert s == r'''
 context         '@@ -51,6 +51,13 @@'
 unmod    51  51 '<u>\t</u>begin();'
 unmod    52  52 '<u>\t</u>'
+unmod    52  52 '<u>\t</u><i></i>'
 add      53     '<u>\t</u>int foo;<u class="cr"></u>'
 add      54     '<u>\t</u>int bar; <u class="cr"></u>'
 add      55     '<u>\t</u>int baz;<u>\t</u><u class="cr"></u>'
 add      56     '<u>\t</u>int space; <i></i>'
 add      57     '<u>\t</u>int tab;<u>\t</u>'
 add      58     '<u>\t</u>'
 add      57     '<u>\t</u>int tab;<u>\t</u><i></i>'
 add      58     '<u>\t</u><i></i>'
 unmod    59  53 ' <i></i>'
 del          54 '<u>\t</u>#define MAX_STEPS (48)'
 add      60     '<u>\t</u><u class="cr"></u>'
 add      61     '<u>\t</u>#define MAX_STEPS (64)<u class="cr"></u>'
 unmod    62  55 ''
 del          56 '<u>\t</u>#define MIN_STEPS (<del>48</del>)'
 add      63     '<u>\t</u>#define MIN_STEPS (<ins>42</ins>)'
 '''

0 comments (0 inline, 0 general)