# -*- coding: utf-8 -*-
"""
rhodecode.lib.diffs
~~~~~~~~~~~~~~~~~~~
Set of diffing helpers, previously part of vcs
:created_on: Dec 4, 2011
:author: marcink
:copyright: (C) 2009-2011 Marcin Kuzminski <marcin@python-works.com>
:original copyright: 2007-2008 by Armin Ronacher
:license: GPLv3, see COPYING for more details.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import re
import difflib
from itertools import tee, imap
from mercurial.match import match
from vcs.exceptions import VCSError
from vcs.nodes import FileNode
import markupsafe
def get_gitdiff(filenode_old, filenode_new, ignore_whitespace=True, context=3):
Returns git style diff between given ``filenode_old`` and ``filenode_new``.
:param ignore_whitespace: ignore whitespaces in diff
for filenode in (filenode_old, filenode_new):
if not isinstance(filenode, FileNode):
raise VCSError("Given object should be FileNode object, not %s"
% filenode.__class__)
old_raw_id = getattr(filenode_old.changeset, 'raw_id', '0' * 40)
new_raw_id = getattr(filenode_new.changeset, 'raw_id', '0' * 40)
repo = filenode_new.changeset.repository
vcs_gitdiff = repo._get_diff(old_raw_id, new_raw_id, filenode_new.path,
ignore_whitespace, context)
return vcs_gitdiff
class DiffProcessor(object):
Give it a unified diff and it returns a list of the files that were
mentioned in the diff together with a dict of meta information that
can be used to render it in a HTML template.
_chunk_re = re.compile(r'@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
def __init__(self, diff, differ='diff', format='udiff'):
:param diff: a text in diff format or generator
:param format: format of diff passed, `udiff` or `gitdiff`
if isinstance(diff, basestring):
diff = [diff]
self.__udiff = diff
self.__format = format
self.adds = 0
self.removes = 0
if isinstance(self.__udiff, basestring):
self.lines = iter(self.__udiff.splitlines(1))
elif self.__format == 'gitdiff':
udiff_copy = self.copy_iterator()
self.lines = imap(self.escaper, self._parse_gitdiff(udiff_copy))
else:
self.lines = imap(self.escaper, udiff_copy)
# Select a differ.
if differ == 'difflib':
self.differ = self._highlight_line_difflib
self.differ = self._highlight_line_udiff
def escaper(self, string):
return string.replace('<', '<').replace('>', '>')
return markupsafe.escape(string)
def copy_iterator(self):
make a fresh copy of generator, we should not iterate thru
an original as it's needed for repeating operations on
this instance of DiffProcessor
self.__udiff, iterator_copy = tee(self.__udiff)
return iterator_copy
def _extract_rev(self, line1, line2):
Extract the filename and revision hint from a line.
try:
if line1.startswith('--- ') and line2.startswith('+++ '):
l1 = line1[4:].split(None, 1)
old_filename = l1[0].lstrip('a/') if len(l1) >= 1 else None
old_rev = l1[1] if len(l1) == 2 else 'old'
l2 = line2[4:].split(None, 1)
new_filename = l2[0].lstrip('b/') if len(l1) >= 1 else None
new_rev = l2[1] if len(l2) == 2 else 'new'
filename = old_filename if (old_filename !=
'dev/null') else new_filename
return filename, new_rev, old_rev
except (ValueError, IndexError):
pass
return None, None, None
def _parse_gitdiff(self, diffiterator):
def line_decoder(l):
if l.startswith('+') and not l.startswith('+++'):
self.adds += 1
elif l.startswith('-') and not l.startswith('---'):
self.removes += 1
return l.decode('utf8', 'replace')
output = list(diffiterator)
size = len(output)
if size == 2:
l = []
l.extend([output[0]])
l.extend(output[1].splitlines(1))
return map(line_decoder, l)
elif size == 1:
return map(line_decoder, output[0].splitlines(1))
elif size == 0:
return []
raise Exception('wrong size of diff %s' % size)
def _highlight_line_difflib(self, line, next):
def _highlight_line_difflib(self, line, next_):
Highlight inline changes in both lines.
if line['action'] == 'del':
old, new = line, next
old, new = line, next_
old, new = next, line
old, new = next_, line
oldwords = re.split(r'(\W)', old['line'])
newwords = re.split(r'(\W)', new['line'])
sequence = difflib.SequenceMatcher(None, oldwords, newwords)
oldfragments, newfragments = [], []
for tag, i1, i2, j1, j2 in sequence.get_opcodes():
oldfrag = ''.join(oldwords[i1:i2])
newfrag = ''.join(newwords[j1:j2])
if tag != 'equal':
if oldfrag:
oldfrag = '<del>%s</del>' % oldfrag
if newfrag:
newfrag = '<ins>%s</ins>' % newfrag
oldfragments.append(oldfrag)
newfragments.append(newfrag)
old['line'] = "".join(oldfragments)
new['line'] = "".join(newfragments)
def _highlight_line_udiff(self, line, next):
def _highlight_line_udiff(self, line, next_):
start = 0
limit = min(len(line['line']), len(next['line']))
while start < limit and line['line'][start] == next['line'][start]:
limit = min(len(line['line']), len(next_['line']))
while start < limit and line['line'][start] == next_['line'][start]:
start += 1
end = -1
limit -= start
while -end <= limit and line['line'][end] == next['line'][end]:
while -end <= limit and line['line'][end] == next_['line'][end]:
end -= 1
end += 1
if start or end:
def do(l):
last = end + len(l['line'])
if l['action'] == 'add':
tag = 'ins'
tag = 'del'
l['line'] = '%s<%s>%s</%s>%s' % (
l['line'][:start],
tag,
l['line'][start:last],
l['line'][last:]
)
do(line)
do(next)
do(next_)
def _parse_udiff(self):
Parse the diff an return data for the template.
lineiter = self.lines
files = []
line = lineiter.next()
# skip first context
skipfirst = True
while 1:
# continue until we found the old file
if not line.startswith('--- '):
continue
chunks = []
filename, old_rev, new_rev = \
self._extract_rev(line, lineiter.next())
files.append({
'filename': filename,
'old_revision': old_rev,
'new_revision': new_rev,
'chunks': chunks
})
while line:
match = self._chunk_re.match(line)
if not match:
break
lines = []
chunks.append(lines)
old_line, old_end, new_line, new_end = \
[int(x or 1) for x in match.groups()[:-1]]
old_line -= 1
new_line -= 1
context = len(match.groups()) == 5
old_end += old_line
new_end += new_line
if context:
if not skipfirst:
lines.append({
'old_lineno': '...',
'new_lineno': '...',
'action': 'context',
'line': line,
skipfirst = False
while old_line < old_end or new_line < new_end:
if line:
command, line = line[0], line[1:]
command = ' '
affects_old = affects_new = False
# ignore those if we don't expect them
if command in '#@':
elif command == '+':
affects_new = True
action = 'add'
elif command == '-':
affects_old = True
action = 'del'
affects_old = affects_new = True
action = 'unmod'
old_line += affects_old
new_line += affects_new
'old_lineno': affects_old and old_line or '',
'new_lineno': affects_new and new_line or '',
'action': action,
'line': line
except StopIteration:
# highlight inline changes
for file in files:
for _ in files:
for chunk in chunks:
lineiter = iter(chunk)
#first = True
if line['action'] != 'unmod':
nextline = lineiter.next()
if nextline['action'] == 'unmod' or \
nextline['action'] == line['action']:
self.differ(line, nextline)
return files
def prepare(self):
Prepare the passed udiff for HTML rendering. It'l return a list
of dicts
return self._parse_udiff()
def _safe_id(self, idstring):
"""Make a string safe for including in an id attribute.
The HTML spec says that id attributes 'must begin with
a letter ([A-Za-z]) and may be followed by any number
of letters, digits ([0-9]), hyphens ("-"), underscores
("_"), colons (":"), and periods (".")'. These regexps
are slightly over-zealous, in that they remove colons
and periods unnecessarily.
Whitespace is transformed into underscores, and then
anything which is not a hyphen or a character that
matches \w (alphanumerics and underscore) is removed.
# Transform all whitespace to underscore
idstring = re.sub(r'\s', "_", '%s' % idstring)
# Remove everything that is not a hyphen or a member of \w
idstring = re.sub(r'(?!-)\W', "", idstring).lower()
return idstring
def raw_diff(self):
Returns raw string as udiff
@@ -169,211 +169,223 @@ class CodeHtmlFormatter(HtmlFormatter):
lines.append('<span class="special">%*d</span>' % (mw, i))
if aln:
lines.append('<a href="#%s%d">%*d</a>' % (la, i, mw, i))
lines.append('%*d' % (mw, i))
lines.append('')
ls = '\n'.join(lines)
for i in range(fl, fl + lncount):
if i % st == 0:
# in case you wonder about the seemingly redundant <div> here: since the
# content in the other cell also is wrapped in a div, some browsers in
# some configurations seem to mess up the formatting...
if nocls:
yield 0, ('<table class="%stable">' % self.cssclass +
'<tr><td><div class="linenodiv" '
'style="background-color: #f0f0f0; padding-right: 10px">'
'<pre style="line-height: 125%">' +
ls + '</pre></div></td><td id="hlcode" class="code">')
'<tr><td class="linenos"><div class="linenodiv"><pre>' +
yield 0, dummyoutfile.getvalue()
yield 0, '</td></tr></table>'
def pygmentize(filenode, **kwargs):
"""pygmentize function using pygments
:param filenode:
return literal(code_highlight(filenode.content,
filenode.lexer, CodeHtmlFormatter(**kwargs)))
def pygmentize_annotation(repo_name, filenode, **kwargs):
"""pygmentize function for annotation
pygmentize function for annotation
color_dict = {}
def gen_color(n=10000):
"""generator for getting n of evenly distributed colors using
hsv color and golden ratio. It always return same order of colors
:returns: RGB tuple
def hsv_to_rgb(h, s, v):
if s == 0.0: return v, v, v
i = int(h * 6.0) # XXX assume int() truncates!
if s == 0.0:
return v, v, v
f = (h * 6.0) - i
p = v * (1.0 - s)
q = v * (1.0 - s * f)
t = v * (1.0 - s * (1.0 - f))
i = i % 6
if i == 0: return v, t, p
if i == 1: return q, v, p
if i == 2: return p, v, t
if i == 3: return p, q, v
if i == 4: return t, p, v
if i == 5: return v, p, q
if i == 0:
return v, t, p
if i == 1:
return q, v, p
if i == 2:
return p, v, t
if i == 3:
return p, q, v
if i == 4:
return t, p, v
if i == 5:
return v, p, q
golden_ratio = 0.618033988749895
h = 0.22717784590367374
for _ in xrange(n):
h += golden_ratio
h %= 1
HSV_tuple = [h, 0.95, 0.95]
RGB_tuple = hsv_to_rgb(*HSV_tuple)
yield map(lambda x:str(int(x * 256)), RGB_tuple)
yield map(lambda x: str(int(x * 256)), RGB_tuple)
cgenerator = gen_color()
def get_color_string(cs):
if color_dict.has_key(cs):
if cs in color_dict:
col = color_dict[cs]
col = color_dict[cs] = cgenerator.next()
return "color: rgb(%s)! important;" % (', '.join(col))
def url_func(repo_name):
def _url_func(changeset):
author = changeset.author
date = changeset.date
message = tooltip(changeset.message)
tooltip_html = ("<div style='font-size:0.8em'><b>Author:</b>"
" %s<br/><b>Date:</b> %s</b><br/><b>Message:"
"</b> %s<br/></div>")
tooltip_html = tooltip_html % (author, date, message)
lnk_format = '%5s:%s' % ('r%s' % changeset.revision,
short_id(changeset.raw_id))
uri = link_to(
lnk_format,
url('changeset_home', repo_name=repo_name,
revision=changeset.raw_id),
style=get_color_string(changeset.raw_id),
class_='tooltip',
title=tooltip_html
uri += '\n'
return uri
return _url_func
return literal(annotate_highlight(filenode, url_func(repo_name), **kwargs))
def is_following_repo(repo_name, user_id):
from rhodecode.model.scm import ScmModel
return ScmModel().is_following_repo(repo_name, user_id)
flash = _Flash()
#==============================================================================
# SCM FILTERS available via h.
from vcs.utils import author_name, author_email
from rhodecode.lib import credentials_filter, age as _age
from rhodecode.model.db import User
age = lambda x:_age(x)
age = lambda x: _age(x)
capitalize = lambda x: x.capitalize()
email = author_email
short_id = lambda x: x[:12]
hide_credentials = lambda x: ''.join(credentials_filter(x))
def email_or_none(author):
_email = email(author)
if _email != '':
return _email
# See if it contains a username we can get an email from
user = User.get_by_username(author_name(author), case_insensitive=True,
cache=True)
if user is not None:
return user.email
# No valid email, not a valid user in the system, none!
return None
def person(author):
# attr to return from fetched user
person_getter = lambda usr: usr.username
# Valid email in the attribute passed, see if they're in the system
user = User.get_by_email(_email, case_insensitive=True, cache=True)
return person_getter(user)
# Maybe it's a username?
_author = author_name(author)
user = User.get_by_username(_author, case_insensitive=True,
# Still nothing? Just pass back the author name then
return _author
def bool2icon(value):
"""Returns True/False values represented as small html image of true/false
icons
:param value: bool value
if value is True:
return HTML.tag('img', src=url("/images/icons/accept.png"),
alt=_('True'))
if value is False:
return HTML.tag('img', src=url("/images/icons/cancel.png"),
alt=_('False'))
return value
def action_parser(user_log, feed=False):
"""This helper will action_map the specified string action into translated
fancy names with icons and links
:param user_log: user log instance
:param feed: use output for feeds (no html and fancy icons)
action = user_log.action
action_params = ' '
x = action.split(':')
Status change: