@@ -100,276 +100,278 @@ def wrapped_diff(filenode_old, filenode_
diff = wrap_to_table(_('No changes detected'))
cs1 = filenode_old.changeset.raw_id
cs2 = filenode_new.changeset.raw_id
return size, cs1, cs2, diff, stats
def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
"""
Returns git style diff between given ``filenode_old`` and ``filenode_new``.
:param ignore_whitespace: ignore whitespaces in diff
# make sure we pass in default context
context = context or 3
submodules = filter(lambda o: isinstance(o, SubModuleNode),
[filenode_new, filenode_old])
if submodules:
return ''
for filenode in (filenode_old, filenode_new):
if not isinstance(filenode, FileNode):
raise VCSError("Given object should be FileNode object, not %s"
% filenode.__class__)
repo = filenode_new.changeset.repository
old_raw_id = getattr(filenode_old.changeset, 'raw_id', repo.EMPTY_CHANGESET)
new_raw_id = getattr(filenode_new.changeset, 'raw_id', repo.EMPTY_CHANGESET)
vcs_gitdiff = repo.get_diff(old_raw_id, new_raw_id, filenode_new.path,
ignore_whitespace, context)
return vcs_gitdiff
NEW_FILENODE = 1
DEL_FILENODE = 2
MOD_FILENODE = 3
RENAMED_FILENODE = 4
CHMOD_FILENODE = 5
class DiffLimitExceeded(Exception):
pass
class LimitedDiffContainer(object):
def __init__(self, diff_limit, cur_diff_size, diff):
self.diff = diff
self.diff_limit = diff_limit
self.cur_diff_size = cur_diff_size
def __iter__(self):
for l in self.diff:
yield l
class DiffProcessor(object):
Give it a unified or git diff and it returns a list of the files that were
mentioned in the diff together with a dict of meta information that
can be used to render it in a HTML template.
_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
_newline_marker = re.compile(r'^\\ No newline at end of file')
_git_header_re = re.compile(r"""
#^diff[ ]--git
[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
^rename[ ]from[ ](?P<rename_from>\S+)\n
^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))?
(?:^old[ ]mode[ ](?P<old_mode>\d+)\n
^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
(?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
(?:^---[ ](a/(?P<a_file>.+)|/dev/null)(?:\n|$))?
(?:^\+\+\+[ ](b/(?P<b_file>.+)|/dev/null)(?:\n|$))?
""", re.VERBOSE | re.MULTILINE)
_hg_header_re = re.compile(r"""
(?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
(?:^rename[ ]from[ ](?P<rename_from>\S+)\n
#used for inline highlighter word split
_token_re = re.compile(r'()(>|<|&|\W+?)')
def __init__(self, diff, vcs='hg', format='gitdiff', diff_limit=None):
:param diff: a text in diff format
:param vcs: type of version controll hg or git
:param format: format of diff passed, `udiff` or `gitdiff`
:param diff_limit: define the size of diff that is considered "big"
based on that parameter cut off will be triggered, set to None
to show full diff
if not isinstance(diff, basestring):
raise Exception('Diff must be a basestring got %s instead' % type(diff))
self._diff = diff
self._format = format
self.adds = 0
self.removes = 0
# calculate diff size
self.diff_size = len(diff)
self.cur_diff_size = 0
self.parsed = False
self.parsed_diff = []
self.vcs = vcs
if format == 'gitdiff':
self.differ = self._highlight_line_difflib
self._parser = self._parse_gitdiff
else:
self.differ = self._highlight_line_udiff
self._parser = self._parse_udiff
def _copy_iterator(self):
make a fresh copy of generator, we should not iterate thru
an original as it's needed for repeating operations on
this instance of DiffProcessor
self.__udiff, iterator_copy = tee(self.__udiff)
return iterator_copy
def _escaper(self, string):
Escaper for diff escapes special chars and checks the diff limit
:param string:
:type string:
self.cur_diff_size += len(string)
# escaper get's iterated on each .next() call and it checks if each
# parsed line doesn't exceed the diff limit
if self.diff_limit is not None and self.cur_diff_size > self.diff_limit:
raise DiffLimitExceeded('Diff Limit Exceeded')
return safe_unicode(string).replace('&', '&')\
.replace('<', '<')\
.replace('>', '>')
def _line_counter(self, l):
Checks each line and bumps total adds/removes for this diff
:param l:
if l.startswith('+') and not l.startswith('+++'):
self.adds += 1
elif l.startswith('-') and not l.startswith('---'):
self.removes += 1
return safe_unicode(l)
def _highlight_line_difflib(self, line, next_):
Highlight inline changes in both lines.
if line['action'] == 'del':
old, new = line, next_
old, new = next_, line
oldwords = re.split(r'(\W)', old['line'])
newwords = re.split(r'(\W)', new['line'])
oldwords = self._token_re.split(old['line'])
newwords = self._token_re.split(new['line'])
sequence = difflib.SequenceMatcher(None, oldwords, newwords)
oldfragments, newfragments = [], []
for tag, i1, i2, j1, j2 in sequence.get_opcodes():
oldfrag = ''.join(oldwords[i1:i2])
newfrag = ''.join(newwords[j1:j2])
if tag != 'equal':
if oldfrag:
oldfrag = '<del>%s</del>' % oldfrag
if newfrag:
newfrag = '<ins>%s</ins>' % newfrag
oldfragments.append(oldfrag)
newfragments.append(newfrag)
old['line'] = "".join(oldfragments)
new['line'] = "".join(newfragments)
def _highlight_line_udiff(self, line, next_):
start = 0
limit = min(len(line['line']), len(next_['line']))
while start < limit and line['line'][start] == next_['line'][start]:
start += 1
end = -1
limit -= start
while -end <= limit and line['line'][end] == next_['line'][end]:
end -= 1
end += 1
if start or end:
def do(l):
last = end + len(l['line'])
if l['action'] == 'add':
tag = 'ins'
tag = 'del'
l['line'] = '%s<%s>%s</%s>%s' % (
l['line'][:start],
tag,
l['line'][start:last],
l['line'][last:]
)
do(line)
do(next_)
def _get_header(self, diff_chunk):
parses the diff header, and returns parts, and leftover diff
parts consists of 14 elements::
a_path, b_path, similarity_index, rename_from, rename_to,
old_mode, new_mode, new_file_mode, deleted_file_mode,
a_blob_id, b_blob_id, b_mode, a_file, b_file
:param diff_chunk:
:type diff_chunk:
if self.vcs == 'git':
match = self._git_header_re.match(diff_chunk)
diff = diff_chunk[match.end():]
return match.groupdict(), imap(self._escaper, diff.splitlines(1))
elif self.vcs == 'hg':
match = self._hg_header_re.match(diff_chunk)
raise Exception('VCS type %s is not supported' % self.vcs)
def _clean_line(self, line, command):
if command in ['+', '-', ' ']:
#only modify the line if it's actually a diff thing
line = line[1:]
return line
def _parse_gitdiff(self, inline_diff=True):
_files = []
diff_container = lambda arg: arg
##split the diff in chunks of separate --git a/file b/file chunks
for raw_diff in ('\n' + self._diff).split('\ndiff --git')[1:]:
binary = False
binary_msg = 'unknown binary'
head, diff = self._get_header(raw_diff)
if not head['a_file'] and head['b_file']:
op = 'A'
elif head['a_file'] and head['b_file']:
op = 'M'
elif head['a_file'] and not head['b_file']:
op = 'D'
#probably we're dealing with a binary file 1
binary = True
Status change: