kallithea Changeset - 7d0476e1f1dc

Changeset - 7d0476e1f1dc

Parent rev.

Child rev.

[Not reviewed]

beta

0 1 0

Marcin Kuzminski - 13 years ago 2012-12-11 01:15:30
marcin@python-works.com

fixes issue #678 Incorrect diff markup when diff contains >, <, or & symbols
- regex by \W did split & and other to 3 tokens, and escaping was broken

1 file changed with 5 insertions and 3 deletions:

rhodecode/lib/diffs.py

0 comments (0 inline, 0 general)

rhodecode/lib/diffs.py

➞

Show inline comments

@@ @@ -100,276 +100,278 @@ def wrapped_diff(filenode_old, filenode_ @@
             diff = wrap_to_table(_('No changes detected'))
     cs1 = filenode_old.changeset.raw_id
     cs2 = filenode_new.changeset.raw_id
     return size, cs1, cs2, diff, stats
 def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
     """
     Returns git style diff between given ``filenode_old`` and ``filenode_new``.
     :param ignore_whitespace: ignore whitespaces in diff
     """
     # make sure we pass in default context
     context = context or 3
     submodules = filter(lambda o: isinstance(o, SubModuleNode),
                         [filenode_new, filenode_old])
     if submodules:
         return ''
     for filenode in (filenode_old, filenode_new):
         if not isinstance(filenode, FileNode):
             raise VCSError("Given object should be FileNode object, not %s"
                 % filenode.__class__)
     repo = filenode_new.changeset.repository
     old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
     new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
     vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
                                 ignore_whitespace, context)
     return vcs_gitdiff
 NEW_FILENODE = 1
 DEL_FILENODE = 2
 MOD_FILENODE = 3
 RENAMED_FILENODE = 4
 CHMOD_FILENODE = 5
 class DiffLimitExceeded(Exception):
     pass
 class LimitedDiffContainer(object):
     def __init__(self, diff_limit, cur_diff_size, diff):
         self.diff = diff
         self.diff_limit = diff_limit
         self.cur_diff_size = cur_diff_size
     def __iter__(self):
         for l in self.diff:
             yield l
 class DiffProcessor(object):
     """
     Give it a unified or git diff and it returns a list of the files that were
     mentioned in the diff together with a dict of meta information that
     can be used to render it in a HTML template.
     """
     _chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
     _newline_marker = re.compile(r'^\\ No newline at end of file')
     _git_header_re = re.compile(r"""
         #^diff[ ]--git
             [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
         (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
            ^rename[ ]from[ ](?P<rename_from>\S+)\n
            ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
         (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
            ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
         (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
         (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
         (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
             \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
         (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
         (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
     """, re.VERBOSE | re.MULTILINE)
     _hg_header_re = re.compile(r"""
         #^diff[ ]--git
             [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
         (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
         (?:^rename[ ]from[ ](?P<rename_from>\S+)\n
            ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
         (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
            ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
         (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
         (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
         (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
             \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
         (?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
         (?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
     """, re.VERBOSE | re.MULTILINE)
     #used for inline highlighter word split
     _token_re = re.compile(r'()(&gt;|&lt;|&amp;|\W+?)')
     def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):
         """
         :param diff:   a text in diff format
         :param vcs: type of version controll hg or git
         :param format: format of diff passed, `udiff` or `gitdiff`
         :param diff_limit: define the size of diff that is considered "big"
             based on that parameter cut off will be triggered, set to None
             to show full diff
         """
         if not isinstance(diff, basestring):
             raise Exception('Diff must be a basestring got %s instead' % type(diff))
         self._diff = diff
         self._format = format
         self.adds = 0
         self.removes = 0
         # calculate diff size
         self.diff_size = len(diff)
         self.diff_limit = diff_limit
         self.cur_diff_size = 0
         self.parsed = False
         self.parsed_diff = []
         self.vcs = vcs
         if format == 'gitdiff':
             self.differ = self._highlight_line_difflib
             self._parser = self._parse_gitdiff
         else:
             self.differ = self._highlight_line_udiff
             self._parser = self._parse_udiff
     def _copy_iterator(self):
         """
         make a fresh copy of generator, we should not iterate thru
         an original as it's needed for repeating operations on
         this instance of DiffProcessor
         """
         self.__udiff, iterator_copy = tee(self.__udiff)
         return iterator_copy
     def _escaper(self, string):
         """
         Escaper for diff escapes special chars and checks the diff limit
         :param string:
         :type string:
         """
         self.cur_diff_size += len(string)
         # escaper get's iterated on each .next() call and it checks if each
         # parsed line doesn't exceed the diff limit
         if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
             raise DiffLimitExceeded('Diff Limit Exceeded')
         return safe_unicode(string).replace('&', '&amp;')\
                 .replace('<', '&lt;')\
                 .replace('>', '&gt;')
     def _line_counter(self, l):
         """
         Checks each line and bumps total adds/removes for this diff
         :param l:
         """
         if l.startswith('+') and not l.startswith('+++'):
             self.adds += 1
         elif l.startswith('-') and not l.startswith('---'):
             self.removes += 1
         return safe_unicode(l)
     def _highlight_line_difflib(self, line, next_):
         """
         Highlight inline changes in both lines.
         """
         if line['action'] == 'del':
             old, new = line, next_
         else:
             old, new = next_, line
         oldwords = re.split(r'(\W)', old['line'])
         newwords = re.split(r'(\W)', new['line'])
         oldwords = self._token_re.split(old['line'])
         newwords = self._token_re.split(new['line'])
         sequence = difflib.SequenceMatcher(None, oldwords, newwords)
         oldfragments, newfragments = [], []
         for tag, i1, i2, j1, j2 in sequence.get_opcodes():
             oldfrag = ''.join(oldwords[i1:i2])
             newfrag = ''.join(newwords[j1:j2])
             if tag != 'equal':
                 if oldfrag:
                     oldfrag = '<del>%s</del>' % oldfrag
                 if newfrag:
                     newfrag = '<ins>%s</ins>' % newfrag
             oldfragments.append(oldfrag)
             newfragments.append(newfrag)
         old['line'] = "".join(oldfragments)
         new['line'] = "".join(newfragments)
     def _highlight_line_udiff(self, line, next_):
         """
         Highlight inline changes in both lines.
         """
         start = 0
         limit = min(len(line['line']), len(next_['line']))
         while start < limit and line['line'][start] == next_['line'][start]:
             start += 1
         end = -1
         limit -= start
         while -end <= limit and line['line'][end] == next_['line'][end]:
             end -= 1
         end += 1
         if start or end:
             def do(l):
                 last = end + len(l['line'])
                 if l['action'] == 'add':
                     tag = 'ins'
                 else:
                     tag = 'del'
                 l['line'] = '%s<%s>%s</%s>%s' % (
                     l['line'][:start],
                     tag,
                     l['line'][start:last],
                     tag,
                     l['line'][last:]
+                )
             do(line)
             do(next_)
     def _get_header(self, diff_chunk):
         """
         parses the diff header, and returns parts, and leftover diff
         parts consists of 14 elements::
             a_path, b_path, similarity_index, rename_from, rename_to,
             old_mode, new_mode, new_file_mode, deleted_file_mode,
             a_blob_id, b_blob_id, b_mode, a_file, b_file
         :param diff_chunk:
         :type diff_chunk:
         """
         if self.vcs == 'git':
             match = self._git_header_re.match(diff_chunk)
             diff = diff_chunk[match.end():]
             return match.groupdict(), imap(self._escaper, diff.splitlines(1))
         elif self.vcs == 'hg':
             match = self._hg_header_re.match(diff_chunk)
             diff = diff_chunk[match.end():]
             return match.groupdict(), imap(self._escaper, diff.splitlines(1))
         else:
             raise Exception('VCS type %s is not supported' % self.vcs)
     def _clean_line(self, line, command):
         if command in ['+', '-', ' ']:
             #only modify the line if it's actually a diff thing
             line = line[1:]
         return line
     def _parse_gitdiff(self, inline_diff=True):
         _files = []
         diff_container = lambda arg: arg
         ##split the diff in chunks of separate --git a/file b/file chunks
         for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
             binary = False
             binary_msg = 'unknown binary'
             head, diff = self._get_header(raw_diff)
             if not head['a_file'] and head['b_file']:
                 op = 'A'
             elif head['a_file'] and head['b_file']:
                 op = 'M'
             elif head['a_file'] and not head['b_file']:
                 op = 'D'
             else:
                 #probably we're dealing with a binary file 1
                 binary = True

0 comments (0 inline, 0 general)