kallithea Changeset - 161cf7e3566f

Changeset - 161cf7e3566f

Parent rev.

Child rev.

[Not reviewed]

default

0 2 0

Mads Kiilerich (mads) - 5 years ago 2020-10-28 14:08:28
mads@kiilerich.com

Grafted from: 6ad8bc4c1920

diff: refactor to avoid complexity of passing unnecessary trailing newline around

_escaper tried to handle these optional newlines ... but did not always preserve
them. Keep it simple.

In normal operations, all diff lines end with \n . We only saw un-terminated
lines in a couple of old test cases.

2 files changed with 22 insertions and 18 deletions:

kallithea/lib/diffs.py

kallithea/tests/models/test_diff_parsers.py

0 comments (0 inline, 0 general)

kallithea/lib/diffs.py

➞

Show inline comments

@@ @@ -400,97 +400,97 @@ class DiffProcessor(object): @@
                 'chunks':           chunks,
                 'operation':        op,
                 'stats':            stats,
             })
         if not inline_diff:
             return _files
         # highlight inline changes when one del is followed by one add
         for diff_data in _files:
             for chunk in diff_data['chunks']:
                 lineiter = iter(chunk)
                 try:
                     peekline = next(lineiter)
                     while True:
                         # find a first del line
                         while peekline['action'] != 'del':
                             peekline = next(lineiter)
                         delline = peekline
                         peekline = next(lineiter)
                         # if not followed by add, eat all following del lines
                         if peekline['action'] != 'add':
                             while peekline['action'] == 'del':
                                 peekline = next(lineiter)
                             continue
                         # found an add - make sure it is the only one
                         addline = peekline
                         try:
                             peekline = next(lineiter)
                         except StopIteration:
                             # add was last line - ok
                             _highlight_inline_diff(delline, addline)
                             raise
                         if peekline['action'] != 'add':
                             # there was only one add line - ok
                             _highlight_inline_diff(delline, addline)
                 except StopIteration:
                     pass
         return _files
     def stat(self):
         """
         Returns tuple of added, and removed lines for this instance
         """
         return self.adds, self.removes
-_escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( \n| $)|(\t\n|\t$)')
 _escape_re = re.compile(r'(&)|(<)|(>)|(\t)|(\r)|(?<=.)( $)|(\t$)')
 def _escaper(diff_line):
     r"""
     Do HTML escaping/markup of a single diff line (including first +/- column)
     >>> _escaper('foobar')
     'foobar'
     >>> _escaper('@foo & bar')
     '@foo &amp; bar'
     >>> _escaper('+foo < bar')
     '+foo &lt; bar'
     >>> _escaper('-foo > bar')
     '-foo &gt; bar'
     >>> _escaper(' <foo>')
     ' &lt;foo&gt;'
     >>> _escaper(' foo\tbar')
     ' foo<u>\t</u>bar'
     >>> _escaper(' foo\rbar\r')
     ' foo<u class="cr"></u>bar<u class="cr"></u>'
     >>> _escaper(' foo\t')
     ' foo<u>\t</u>'
     >>> _escaper(' foo ')
     ' foo <i></i>'
     >>> _escaper(' foo  ')
     ' foo  <i></i>'
     >>> _escaper(' ')
     ' '
     >>> _escaper('  ')
     '  <i></i>'
     >>> _escaper(' \t')
     ' <u>\t</u>'
     >>> _escaper(' \t  ')
     ' <u>\t</u>  <i></i>'
     >>> _escaper('   \t')
     '   <u>\t</u>'
     >>> _escaper(' \t\t  ')
     ' <u>\t</u><u>\t</u>  <i></i>'
     >>> _escaper('   \t\t')
     '   <u>\t</u><u>\t</u>'
     >>> _escaper(' foo&bar<baz>  ')
     ' foo&amp;bar&lt;baz&gt;  <i></i>'
     """
     def substitute(m):
         groups = m.groups()
         if groups[0]:
             return '&amp;'
@@ @@ -526,99 +526,103 @@ _git_header_re = re.compile(br""" @@
     (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
     (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 """, re.VERBOSE | re.MULTILINE)
 _hg_header_re = re.compile(br"""
     ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
     (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
        ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
     (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
     (?:^rename[ ]from[ ](?P<rename_from>.+)\n
        ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
     (?:^copy[ ]from[ ](?P<copy_from>.+)\n
        ^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?
     (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
     (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
     (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
         \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
     (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
     (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
     (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 """, re.VERBOSE | re.MULTILINE)
 _header_next_check = re.compile(br'''(?!@)(?!literal )(?!delta )''')
 def _get_header(vcs, diff_chunk):
     """
     Parses a Git diff for a single file (header and chunks) and returns a tuple with:
 . A dict with meta info:
         a_path, b_path, similarity_index, rename_from, rename_to,
         old_mode, new_mode, new_file_mode, deleted_file_mode,
         a_blob_id, b_blob_id, b_mode, a_file, b_file
 . An iterator yielding lines with simple HTML markup.
     """
     match = None
     if vcs == 'git':
         match = _git_header_re.match(diff_chunk)
     elif vcs == 'hg':
         match = _hg_header_re.match(diff_chunk)
     if match is None:
         raise Exception('diff not recognized as valid %s diff' % vcs)
     meta_info = {k: None if v is None else safe_str(v) for k, v in match.groupdict().items()}
     rest = diff_chunk[match.end():]
     if rest and _header_next_check.match(rest):
         raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, safe_str(bytes(diff_chunk[:match.end()])), safe_str(bytes(rest[:1000]))))
     diff_lines = (_escaper(safe_str(m.group(0))) for m in re.finditer(br'.*\n|.+$', rest)) # don't split on \r as str.splitlines do
     if rest:
         if _header_next_check.match(rest):
             raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, safe_str(bytes(diff_chunk[:match.end()])), safe_str(bytes(rest[:1000]))))
         if rest[-1:] != b'\n':
             # The diff will generally already have trailing \n (and be a memoryview). It might also be huge so we don't want to allocate it twice. But in this very rare case, we don't care.
             rest = bytes(rest) + b'\n'
     diff_lines = (_escaper(safe_str(m.group(1))) for m in re.finditer(br'(.*)\n', rest))
     return meta_info, diff_lines
 _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 _newline_marker = re.compile(r'^\\ No newline at end of file')
 def _parse_lines(diff_lines):
     """
     Given an iterator of diff body lines, parse them and return a dict per
     line and added/removed totals.
     """
     added = deleted = 0
     old_line = old_end = new_line = new_end = None
     chunks = []
     try:
         line = next(diff_lines)
         while True:
             lines = []
             chunks.append(lines)
             match = _chunk_re.match(line)
             if not match:
                 raise Exception('error parsing diff @@ line %r' % line)
             gr = match.groups()
             (old_line, old_end,
              new_line, new_end) = [int(x or 1) for x in gr[:-1]]
             old_line -= 1
             new_line -= 1
             context = len(gr) == 5
             old_end += old_line
             new_end += new_line
             if context:
                 # skip context only if it's first line
                 if int(gr[0]) > 1:
                     lines.append({
                         'old_lineno': '',
                         'new_lineno': '',
                         'action':     'context',
                         'line':       line,
                     })

kallithea/tests/models/test_diff_parsers.py

➞

Show inline comments

@@ @@ -250,65 +250,65 @@ DIFF_FIXTURES = { @@
           'deleted': 0,
           'binary': True,
           'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
                   CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
     ],
     'hg_diff_copy_chmod_and_edit_file.diff': [
         ('file5', 'modified',
          {'added': 2,
           'deleted': 1,
           'binary': False,
           'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
                   CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
                   MOD_FILENODE: 'modified file'}}),
     ],
     'hg_diff_rename_space_cr.diff': [
         ('oh yes', 'renamed',
          {'added': 3,
           'deleted': 2,
           'binary': False,
           'ops': {RENAMED_FILENODE: 'file renamed from oh no to oh yes'}}),
     ],
+}
 class TestDiffLib(base.TestController):
     @base.parametrize('diff_fixture', DIFF_FIXTURES)
     def test_diff(self, diff_fixture):
         raw_diff = fixture.load_resource(diff_fixture, strip=False)
         vcs = 'hg'
         if diff_fixture.startswith('git_'):
             vcs = 'git'
         diff_processor = DiffProcessor(raw_diff, vcs=vcs)
         data = [(x['filename'], x['operation'], x['stats']) for x in diff_processor.parsed]
         expected_data = DIFF_FIXTURES[diff_fixture]
         assert expected_data == data
     def test_diff_markup(self):
         raw_diff = fixture.load_resource('markuptest.diff', strip=False)
         diff_processor = DiffProcessor(raw_diff)
         chunks = diff_processor.parsed[0]['chunks']
         assert not chunks[0]
         #from pprint import pprint; pprint(chunks[1])
         l = ['\n']
         for d in chunks[1]:
             l.append('%(action)-7s %(new_lineno)3s %(old_lineno)3s %(line)r\n' % d)
         s = ''.join(l)
         assert s == r'''
 context         '@@ -51,6 +51,13 @@\n'
 unmod    51  51 '<u>\t</u>begin();\n'
 unmod    52  52 '<u>\t</u>\n'
 add      53     '<u>\t</u>int foo;<u class="cr"></u>\n'
 add      54     '<u>\t</u>int bar; <u class="cr"></u>\n'
 add      55     '<u>\t</u>int baz;<u>\t</u><u class="cr"></u>\n'
 context         '@@ -51,6 +51,13 @@'
 unmod    51  51 '<u>\t</u>begin();'
 unmod    52  52 '<u>\t</u>'
 add      53     '<u>\t</u>int foo;<u class="cr"></u>'
 add      54     '<u>\t</u>int bar; <u class="cr"></u>'
 add      55     '<u>\t</u>int baz;<u>\t</u><u class="cr"></u>'
 add      56     '<u>\t</u>int space; <i></i>'
 add      57     '<u>\t</u>int tab;<u>\t</u>\n'
 add      58     '<u>\t</u>\n'
 add      57     '<u>\t</u>int tab;<u>\t</u>'
 add      58     '<u>\t</u>'
 unmod    59  53 ' <i></i>'
 del          54 '<u>\t</u>#define MAX_STEPS (48)\n'
 add      60     '<u>\t</u><u class="cr"></u>\n'
 add      61     '<u>\t</u>#define MAX_STEPS (64)<u class="cr"></u>\n'
 unmod    62  55 '\n'
 del          56 '<u>\t</u>#define MIN_STEPS (<del>48</del>)\n'
 add      63     '<u>\t</u>#define MIN_STEPS (<ins>42</ins>)\n'
 del          54 '<u>\t</u>#define MAX_STEPS (48)'
 add      60     '<u>\t</u><u class="cr"></u>'
 add      61     '<u>\t</u>#define MAX_STEPS (64)<u class="cr"></u>'
 unmod    62  55 ''
 del          56 '<u>\t</u>#define MIN_STEPS (<del>48</del>)'
 add      63     '<u>\t</u>#define MIN_STEPS (<ins>42</ins>)'
 '''

0 comments (0 inline, 0 general)