Changeset - fd61f678577f
[Not reviewed]
default
0 2 1
Mads Kiilerich (mads) - 5 years ago 2020-11-14 15:20:40
mads@kiilerich.com
Grafted from: 9ae72340369d
diff: improved handling of Git diffs with " quoting

Kallithea would intentionally and explicitly fail with an ugly exception when
trying to parse Git diffs with quoted filenames.

Improve this by parsing quotes ... and ignore them, as long as they are
matching. The content inside the quotes might be \-escaped ... but that could
potentially also be the case without quoting. We will fix that later.

Adding some test cases that before would have failed to parse and raised an
exception.

Thanks to stypr of Flatt Security for raising this.
3 files changed with 112 insertions and 3 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/diffs.py
Show inline comments
 
@@ -322,392 +322,392 @@ class DiffProcessor(object):
 
            elif head['new_file_mode']:
 
                op = 'added'
 
                stats['binary'] = True
 
                stats['ops'][NEW_FILENODE] = 'new file %s' % head['new_file_mode']
 
            else:  # modify operation, can be cp, rename, chmod
 
                # CHMOD
 
                if head['new_mode'] and head['old_mode']:
 
                    op = 'modified'
 
                    stats['binary'] = True
 
                    stats['ops'][CHMOD_FILENODE] = ('modified file chmod %s => %s'
 
                                        % (head['old_mode'], head['new_mode']))
 
                # RENAME
 
                if (head['rename_from'] and head['rename_to']
 
                      and head['rename_from'] != head['rename_to']):
 
                    op = 'renamed'
 
                    stats['binary'] = True
 
                    stats['ops'][RENAMED_FILENODE] = ('file renamed from %s to %s'
 
                                    % (head['rename_from'], head['rename_to']))
 
                # COPY
 
                if head.get('copy_from') and head.get('copy_to'):
 
                    op = 'modified'
 
                    stats['binary'] = True
 
                    stats['ops'][COPIED_FILENODE] = ('file copied from %s to %s'
 
                                        % (head['copy_from'], head['copy_to']))
 
                # FALL BACK: detect missed old style add or remove
 
                if op is None:
 
                    if not head['a_file'] and head['b_file']:
 
                        op = 'added'
 
                        stats['binary'] = True
 
                        stats['ops'][NEW_FILENODE] = 'new file'
 

	
 
                    elif head['a_file'] and not head['b_file']:
 
                        op = 'removed'
 
                        stats['binary'] = True
 
                        stats['ops'][DEL_FILENODE] = 'deleted file'
 

	
 
                # it's not ADD not DELETE
 
                if op is None:
 
                    op = 'modified'
 
                    stats['binary'] = True
 
                    stats['ops'][MOD_FILENODE] = 'modified file'
 

	
 
            # a real non-binary diff
 
            if head['a_file'] or head['b_file']:
 
                chunks, added, deleted = _parse_lines(diff_lines)
 
                stats['binary'] = False
 
                stats['added'] = added
 
                stats['deleted'] = deleted
 
                # explicit mark that it's a modified file
 
                if op == 'modified':
 
                    stats['ops'][MOD_FILENODE] = 'modified file'
 
            else:  # Git binary patch (or empty diff)
 
                # Git binary patch
 
                if head['bin_patch']:
 
                    stats['ops'][BIN_FILENODE] = 'binary diff not shown'
 
                chunks = []
 

	
 
            if op == 'removed' and chunks:
 
                # a way of seeing deleted content could perhaps be nice - but
 
                # not with the current UI
 
                chunks = []
 

	
 
            chunks.insert(0, [{
 
                'old_lineno': '',
 
                'new_lineno': '',
 
                'action':     'context',
 
                'line':       msg,
 
                } for _op, msg in stats['ops'].items()
 
                  if _op not in [MOD_FILENODE]])
 

	
 
            _files.append({
 
                'old_filename':     head['a_path'],
 
                'filename':         head['b_path'],
 
                'old_revision':     head['a_blob_id'],
 
                'new_revision':     head['b_blob_id'],
 
                'chunks':           chunks,
 
                'operation':        op,
 
                'stats':            stats,
 
            })
 

	
 
        if not inline_diff:
 
            return _files
 

	
 
        # highlight inline changes when one del is followed by one add
 
        for diff_data in _files:
 
            for chunk in diff_data['chunks']:
 
                lineiter = iter(chunk)
 
                try:
 
                    peekline = next(lineiter)
 
                    while True:
 
                        # find a first del line
 
                        while peekline['action'] != 'del':
 
                            peekline = next(lineiter)
 
                        delline = peekline
 
                        peekline = next(lineiter)
 
                        # if not followed by add, eat all following del lines
 
                        if peekline['action'] != 'add':
 
                            while peekline['action'] == 'del':
 
                                peekline = next(lineiter)
 
                            continue
 
                        # found an add - make sure it is the only one
 
                        addline = peekline
 
                        try:
 
                            peekline = next(lineiter)
 
                        except StopIteration:
 
                            # add was last line - ok
 
                            _highlight_inline_diff(delline, addline)
 
                            raise
 
                        if peekline['action'] != 'add':
 
                            # there was only one add line - ok
 
                            _highlight_inline_diff(delline, addline)
 
                except StopIteration:
 
                    pass
 

	
 
        return _files
 

	
 
    def stat(self):
 
        """
 
        Returns tuple of added, and removed lines for this instance
 
        """
 
        return self.adds, self.removes
 

	
 

	
 
_escape_re = re.compile(r'(&)|(<)|(>)|(\t)($)?|(\r)|( $)')
 

	
 

	
 
def _escaper(diff_line):
 
    r"""
 
    Do HTML escaping/markup of a single diff line (including first +/- column)
 

	
 
    >>> _escaper('foobar')
 
    'foobar'
 
    >>> _escaper('@foo & bar')
 
    '@foo &amp; bar'
 
    >>> _escaper('+foo < bar')
 
    '+foo &lt; bar'
 
    >>> _escaper('-foo > bar')
 
    '-foo &gt; bar'
 
    >>> _escaper(' <foo>')
 
    ' &lt;foo&gt;'
 
    >>> _escaper(' foo\tbar')
 
    ' foo<u>\t</u>bar'
 
    >>> _escaper(' foo\rbar\r')
 
    ' foo<u class="cr"></u>bar<u class="cr"></u>'
 
    >>> _escaper(' foo\t')
 
    ' foo<u>\t</u><i></i>'
 
    >>> _escaper(' foo ')
 
    ' foo <i></i>'
 
    >>> _escaper(' foo  ')
 
    ' foo  <i></i>'
 
    >>> _escaper(' ')
 
    ' '
 
    >>> _escaper('  ')
 
    '  <i></i>'
 
    >>> _escaper(' \t')
 
    ' <u>\t</u><i></i>'
 
    >>> _escaper(' \t  ')
 
    ' <u>\t</u>  <i></i>'
 
    >>> _escaper('   \t')
 
    '   <u>\t</u><i></i>'
 
    >>> _escaper(' \t\t  ')
 
    ' <u>\t</u><u>\t</u>  <i></i>'
 
    >>> _escaper('   \t\t')
 
    '   <u>\t</u><u>\t</u><i></i>'
 
    >>> _escaper(' foo&bar<baz>  ')
 
    ' foo&amp;bar&lt;baz&gt;  <i></i>'
 
    """
 

	
 
    def substitute(m):
 
        groups = m.groups()
 
        if groups[0]:
 
            return '&amp;'
 
        if groups[1]:
 
            return '&lt;'
 
        if groups[2]:
 
            return '&gt;'
 
        if groups[3]:
 
            if groups[4] is not None:  # end of line
 
                return '<u>\t</u><i></i>'
 
            return '<u>\t</u>'
 
        if groups[5]:
 
            return '<u class="cr"></u>'
 
        if groups[6]:
 
            if m.start() == 0:
 
                return ' '  # first column space shouldn't make empty lines show up as trailing space
 
            return ' <i></i>'
 
        assert False
 

	
 
    return _escape_re.sub(substitute, diff_line)
 

	
 

	
 
_git_header_re = re.compile(br"""
 
    ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
    ^diff[ ]--git[ ](?P<a_path_quote>"?)a/(?P<a_path>.+?)(?P=a_path_quote)[ ](?P<b_path_quote>"?)b/(?P<b_path>.+?)(?P=a_path_quote)\n
 
    (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
       ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
    (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n
 
       ^rename[ ]from[ ](?P<rename_from>.+)\n
 
       ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
 
    (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
    (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
    (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
        \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
    (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
    (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    (?:^---[ ](?P<a_file_quote>"?)(a/(?P<a_file>.+?)(?P=a_file_quote)|/dev/null)\t?(?:\n|$))?
 
    (?:^\+\+\+[ ](?P<b_file_quote>"?)(b/(?P<b_file>.+?)(?P=b_file_quote)|/dev/null)\t?(?:\n|$))?
 
""", re.VERBOSE | re.MULTILINE)
 

	
 

	
 
_hg_header_re = re.compile(br"""
 
    ^diff[ ]--git[ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n
 
    (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
 
       ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
 
    (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%(?:\n|$))?
 
    (?:^rename[ ]from[ ](?P<rename_from>.+)\n
 
       ^rename[ ]to[ ](?P<rename_to>.+)(?:\n|$))?
 
    (?:^copy[ ]from[ ](?P<copy_from>.+)\n
 
       ^copy[ ]to[ ](?P<copy_to>.+)(?:\n|$))?
 
    (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
 
    (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
 
    (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
 
        \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
 
    (?:^(?P<bin_patch>GIT[ ]binary[ ]patch)(?:\n|$))?
 
    (?:^---[ ](a/(?P<a_file>.+?)|/dev/null)\t?(?:\n|$))?
 
    (?:^\+\+\+[ ](b/(?P<b_file>.+?)|/dev/null)\t?(?:\n|$))?
 
""", re.VERBOSE | re.MULTILINE)
 

	
 

	
 
_header_next_check = re.compile(br'''(?!@)(?!literal )(?!delta )''')
 

	
 

	
 
def _get_header(vcs, diff_chunk):
 
    """
 
    Parses a Git diff for a single file (header and chunks) and returns a tuple with:
 

	
 
    1. A dict with meta info:
 

	
 
        a_path, b_path, similarity_index, rename_from, rename_to,
 
        old_mode, new_mode, new_file_mode, deleted_file_mode,
 
        a_blob_id, b_blob_id, b_mode, a_file, b_file
 

	
 
    2. An iterator yielding lines with simple HTML markup.
 
    """
 
    match = None
 
    if vcs == 'git':
 
        match = _git_header_re.match(diff_chunk)
 
    elif vcs == 'hg':
 
        match = _hg_header_re.match(diff_chunk)
 
    if match is None:
 
        raise Exception('diff not recognized as valid %s diff: %r' % (vcs, safe_str(bytes(diff_chunk[:1000]))))
 
    meta_info = {k: None if v is None else safe_str(v) for k, v in match.groupdict().items()}
 
    rest = diff_chunk[match.end():]
 
    if rest:
 
        if _header_next_check.match(rest):
 
            raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, safe_str(bytes(diff_chunk[:match.end()])), safe_str(bytes(rest[:1000]))))
 
        if rest[-1:] != b'\n':
 
            # The diff will generally already have trailing \n (and be a memoryview). It might also be huge so we don't want to allocate it twice. But in this very rare case, we don't care.
 
            rest = bytes(rest) + b'\n'
 
    diff_lines = (_escaper(safe_str(m.group(1))) for m in re.finditer(br'(.*)\n', rest))
 
    return meta_info, diff_lines
 

	
 

	
 
_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 
_newline_marker = re.compile(r'^\\ No newline at end of file')
 

	
 

	
 
def _parse_lines(diff_lines):
 
    """
 
    Given an iterator of diff body lines, parse them and return a dict per
 
    line and added/removed totals.
 
    """
 
    added = deleted = 0
 
    old_line = old_end = new_line = new_end = None
 

	
 
    chunks = []
 
    try:
 
        line = next(diff_lines)
 

	
 
        while True:
 
            lines = []
 
            chunks.append(lines)
 

	
 
            match = _chunk_re.match(line)
 

	
 
            if not match:
 
                raise Exception('error parsing diff @@ line %r' % line)
 

	
 
            gr = match.groups()
 
            (old_line, old_end,
 
             new_line, new_end) = [int(x or 1) for x in gr[:-1]]
 
            old_line -= 1
 
            new_line -= 1
 

	
 
            context = len(gr) == 5
 
            old_end += old_line
 
            new_end += new_line
 

	
 
            if context:
 
                # skip context only if it's first line
 
                if int(gr[0]) > 1:
 
                    lines.append({
 
                        'old_lineno': '',
 
                        'new_lineno': '',
 
                        'action':     'context',
 
                        'line':       line,
 
                    })
 

	
 
            line = next(diff_lines)
 

	
 
            while old_line < old_end or new_line < new_end:
 
                if not line:
 
                    raise Exception('error parsing diff - empty line at -%s+%s' % (old_line, new_line))
 

	
 
                affects_old = affects_new = False
 

	
 
                command = line[0]
 
                if command == '+':
 
                    affects_new = True
 
                    action = 'add'
 
                    added += 1
 
                elif command == '-':
 
                    affects_old = True
 
                    action = 'del'
 
                    deleted += 1
 
                elif command == ' ':
 
                    affects_old = affects_new = True
 
                    action = 'unmod'
 
                else:
 
                    raise Exception('error parsing diff - unknown command in line %r at -%s+%s' % (line, old_line, new_line))
 

	
 
                if not _newline_marker.match(line):
 
                    old_line += affects_old
 
                    new_line += affects_new
 
                    lines.append({
 
                        'old_lineno':   affects_old and old_line or '',
 
                        'new_lineno':   affects_new and new_line or '',
 
                        'action':       action,
 
                        'line':         line[1:],
 
                    })
 

	
 
                line = next(diff_lines)
 

	
 
                if _newline_marker.match(line):
 
                    # we need to append to lines, since this is not
 
                    # counted in the line specs of diff
 
                    lines.append({
 
                        'old_lineno':   '',
 
                        'new_lineno':   '',
 
                        'action':       'context',
 
                        'line':         line,
 
                    })
 
                    line = next(diff_lines)
 
            if old_line > old_end:
 
                raise Exception('error parsing diff - more than %s "-" lines at -%s+%s' % (old_end, old_line, new_line))
 
            if new_line > new_end:
 
                raise Exception('error parsing diff - more than %s "+" lines at -%s+%s' % (new_end, old_line, new_line))
 
    except StopIteration:
 
        pass
 
    if old_line != old_end or new_line != new_end:
 
        raise Exception('diff processing broken when old %s<>%s or new %s<>%s line %r' % (old_line, old_end, new_line, new_end, line))
 

	
 
    return chunks, added, deleted
 

	
 
# Used for inline highlighter word split, must match the substitutions in _escaper
 
_token_re = re.compile(r'()(&amp;|&lt;|&gt;|<u>\t</u>|<u class="cr"></u>| <i></i>|\W+?)')
 

	
 

	
 
def _highlight_inline_diff(old, new):
 
    """
 
    Highlight simple add/remove in two lines given as info dicts. They are
 
    modified in place and given markup with <del>/<ins>.
 
    """
 
    assert old['action'] == 'del'
 
    assert new['action'] == 'add'
 

	
 
    oldwords = _token_re.split(old['line'])
 
    newwords = _token_re.split(new['line'])
 
    sequence = difflib.SequenceMatcher(None, oldwords, newwords)
 

	
 
    oldfragments, newfragments = [], []
 
    for tag, i1, i2, j1, j2 in sequence.get_opcodes():
 
        oldfrag = ''.join(oldwords[i1:i2])
 
        newfrag = ''.join(newwords[j1:j2])
 
        if tag != 'equal':
 
            if oldfrag:
 
                oldfrag = '<del>%s</del>' % oldfrag
 
            if newfrag:
 
                newfrag = '<ins>%s</ins>' % newfrag
 
        oldfragments.append(oldfrag)
 
        newfragments.append(newfrag)
 

	
 
    old['line'] = "".join(oldfragments)
 
    new['line'] = "".join(newfragments)
kallithea/tests/fixtures/git_diff_quoting.diff
Show inline comments
 
new file 100644
 
diff --git "a/\"foo\"" "b/\"foo\""
 
new file mode 100644
 
index 0000000..8b13789
 
--- /dev/null
 
+++ "b/\"foo\""
 
@@ -0,0 +1 @@
 
+
 
diff --git a/'foo' b/'foo'
 
new file mode 100644
 
index 0000000..8b13789
 
--- /dev/null
 
+++ b/'foo'
 
@@ -0,0 +1 @@
 
+
 
diff --git "a/'foo'\"foo\"" "b/'foo'\"foo\""
 
new file mode 100644
 
index 0000000..8b13789
 
--- /dev/null
 
+++ "b/'foo'\"foo\""
 
@@ -0,0 +1 @@
 
+
 
diff --git "a/a\r\nb" "b/a\r\nb"
 
new file mode 100644
 
index 0000000..30d74d2
 
--- /dev/null
 
+++ "b/a\r\nb"
 
@@ -0,0 +1 @@
 
+test
 
\ No newline at end of file
 
diff --git "a/foo\rfoo" "b/foo\rfoo"
 
new file mode 100644
 
index 0000000..e69de29
 
diff --git a/foo bar b/foo bar
 
new file mode 100644
 
index 0000000..219ea2b
 
--- /dev/null
 
+++ b/foo bar	
 
@@ -0,0 +1 @@
 
+foo  bar
 
\ No newline at end of file
 
diff --git a/test b/test
 
new file mode 100644
 
index 0000000..9daeafb
 
--- /dev/null
 
+++ b/test
 
@@ -0,0 +1 @@
 
+test
 
diff --git "a/esc\033foo" "b/esc\033foo"
 
new file mode 100644
 
index 0000000..e69de29
 
diff --git "a/tab\tfoo" "b/tab\tfoo"
 
new file mode 100644
 
index 0000000..e69de29
kallithea/tests/models/test_diff_parsers.py
Show inline comments
 
@@ -79,236 +79,292 @@ DIFF_FIXTURES = {
 
    ],
 
    'hg_diff_binary_and_normal.diff': [
 
        ('img/baseline-10px.png', 'added',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {NEW_FILENODE: 'new file 100644',
 
                  BIN_FILENODE: 'binary diff not shown'}}),
 
        ('img/baseline-20px.png', 'removed',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {DEL_FILENODE: 'deleted file',
 
                  BIN_FILENODE: 'binary diff not shown'}}),
 
        ('index.html', 'modified',
 
         {'added': 3,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('js/global.js', 'removed',
 
         {'added': 0,
 
          'deleted': 75,
 
          'binary': False,
 
          'ops': {DEL_FILENODE: 'deleted file'}}),
 
        ('js/jquery/hashgrid.js', 'added',
 
         {'added': 340,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {NEW_FILENODE: 'new file 100755'}}),
 
        ('less/docs.less', 'modified',
 
         {'added': 34,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('less/scaffolding.less', 'modified',
 
         {'added': 1,
 
          'deleted': 3,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('readme.markdown', 'modified',
 
         {'added': 1,
 
          'deleted': 10,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'git_diff_chmod.diff': [
 
        ('work-horus.xls', 'modified',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}})
 
    ],
 
    'git_diff_rename_file.diff': [
 
        ('file.xls', 'renamed',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {RENAMED_FILENODE: 'file renamed from work-horus.xls to file.xls'}}),
 
        ('files/var/www/favicon.ico/DEFAULT',
 
         'renamed',
 
         {'added': 0,
 
          'binary': True,
 
          'deleted': 0,
 
          'ops': {4: 'file renamed from files/var/www/favicon.ico to files/var/www/favicon.ico/DEFAULT',
 
                  6: 'modified file chmod 100644 => 100755'}})
 
    ],
 
    'git_diff_mod_single_binary_file.diff': [
 
        ('US Warszawa.jpg', 'modified',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {MOD_FILENODE: 'modified file',
 
                  BIN_FILENODE: 'binary diff not shown'}})
 
    ],
 
    'git_diff_binary_and_normal.diff': [
 
        ('img/baseline-10px.png', 'added',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {NEW_FILENODE: 'new file 100644',
 
                  BIN_FILENODE: 'binary diff not shown'}}),
 
        ('img/baseline-20px.png', 'removed',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {DEL_FILENODE: 'deleted file',
 
                  BIN_FILENODE: 'binary diff not shown'}}),
 
        ('index.html', 'modified',
 
         {'added': 3,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('js/global.js', 'removed',
 
         {'added': 0,
 
          'deleted': 75,
 
          'binary': False,
 
          'ops': {DEL_FILENODE: 'deleted file'}}),
 
        ('js/jquery/hashgrid.js', 'added',
 
         {'added': 340,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {NEW_FILENODE: 'new file 100755'}}),
 
        ('less/docs.less', 'modified',
 
         {'added': 34,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('less/scaffolding.less', 'modified',
 
         {'added': 1,
 
          'deleted': 3,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('readme.markdown', 'modified',
 
         {'added': 1,
 
          'deleted': 10,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'diff_with_diff_data.diff': [
 
        ('vcs/backends/base.py', 'modified',
 
         {'added': 18,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/backends/git/repository.py', 'modified',
 
         {'added': 46,
 
          'deleted': 15,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/backends/hg.py', 'modified',
 
         {'added': 22,
 
          'deleted': 3,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/tests/test_git.py', 'modified',
 
         {'added': 5,
 
          'deleted': 5,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
        ('vcs/tests/test_repository.py', 'modified',
 
         {'added': 174,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'git_diff_modify_binary_file.diff': [
 
        ('file.name', 'modified',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {MOD_FILENODE: 'modified file',
 
                  BIN_FILENODE: 'binary diff not shown'}})
 
    ],
 
    'hg_diff_copy_file.diff': [
 
        ('file2', 'modified',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {COPIED_FILENODE: 'file copied from file1 to file2'}}),
 
    ],
 
    'hg_diff_copy_and_modify_file.diff': [
 
        ('file3', 'modified',
 
         {'added': 1,
 
          'deleted': 0,
 
          'binary': False,
 
          'ops': {COPIED_FILENODE: 'file copied from file2 to file3',
 
                  MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'hg_diff_copy_and_chmod_file.diff': [
 
        ('file4', 'modified',
 
         {'added': 0,
 
          'deleted': 0,
 
          'binary': True,
 
          'ops': {COPIED_FILENODE: 'file copied from file3 to file4',
 
                  CHMOD_FILENODE: 'modified file chmod 100644 => 100755'}}),
 
    ],
 
    'hg_diff_copy_chmod_and_edit_file.diff': [
 
        ('file5', 'modified',
 
         {'added': 2,
 
          'deleted': 1,
 
          'binary': False,
 
          'ops': {COPIED_FILENODE: 'file copied from file4 to file5',
 
                  CHMOD_FILENODE: 'modified file chmod 100755 => 100644',
 
                  MOD_FILENODE: 'modified file'}}),
 
    ],
 
    'hg_diff_rename_space_cr.diff': [
 
        ('oh yes', 'renamed',
 
         {'added': 3,
 
          'deleted': 2,
 
          'binary': False,
 
          'ops': {RENAMED_FILENODE: 'file renamed from oh no to oh yes'}}),
 
    ],
 
    'git_diff_quoting.diff': [
 
        (r'\"foo\"',  # TODO: quotes should not be escaped
 
         'added',
 
         {'added': 1,
 
          'binary': False,
 
          'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
        ("'foo'",
 
         'added',
 
         {'added': 1,
 
          'binary': False,
 
          'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
        ("'foo'" r'\"foo\"',  # TODO: quotes should not be escaped
 
         'added',
 
         {'added': 1,
 
          'binary': False,
 
          'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
        (r'a\r\nb',  # TODO: escaped
 
         'added',
 
         {'added': 1,
 
          'binary': False,
 
          'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
        (r'foo\rfoo',  # TODO: escaped
 
         'added',
 
        {'added': 0,
 
         'binary': True,
 
         'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
        ('foo bar',
 
         'added',
 
         {'added': 1,
 
          'binary': False,
 
          'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
        ('test',
 
         'added',
 
         {'added': 1,
 
          'binary': False,
 
          'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
        (r'esc\033foo',  # TODO: escaped
 
         'added',
 
         {'added': 0,
 
          'binary': True,
 
          'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
        (r'tab\tfoo',  # TODO: escaped
 
         'added',
 
         {'added': 0,
 
          'binary': True,
 
          'deleted': 0,
 
          'ops': {1: 'new file 100644'}}),
 
    ],
 
}
 

	
 

	
 
class TestDiffLib(base.TestController):
 

	
 
    @base.parametrize('diff_fixture', DIFF_FIXTURES)
 
    def test_diff(self, diff_fixture):
 
        raw_diff = fixture.load_resource(diff_fixture, strip=False)
 
        vcs = 'hg'
 
        if diff_fixture.startswith('git_'):
 
            vcs = 'git'
 
        diff_processor = DiffProcessor(raw_diff, vcs=vcs)
 
        data = [(x['filename'], x['operation'], x['stats']) for x in diff_processor.parsed]
 
        expected_data = DIFF_FIXTURES[diff_fixture]
 
        assert expected_data == data
 

	
 
    def test_diff_markup(self):
 
        raw_diff = fixture.load_resource('markuptest.diff', strip=False)
 
        diff_processor = DiffProcessor(raw_diff)
 
        chunks = diff_processor.parsed[0]['chunks']
 
        assert not chunks[0]
 
        #from pprint import pprint; pprint(chunks[1])
 
        l = ['\n']
 
        for d in chunks[1]:
 
            l.append('%(action)-7s %(new_lineno)3s %(old_lineno)3s %(line)r\n' % d)
 
        s = ''.join(l)
 
        assert s == r'''
 
context         '@@ -51,6 +51,13 @@'
 
unmod    51  51 '<u>\t</u>begin();'
 
unmod    52  52 '<u>\t</u><i></i>'
 
add      53     '<u>\t</u>int foo;<u class="cr"></u>'
 
add      54     '<u>\t</u>int bar; <u class="cr"></u>'
 
add      55     '<u>\t</u>int baz;<u>\t</u><u class="cr"></u>'
 
add      56     '<u>\t</u>int space; <i></i>'
 
add      57     '<u>\t</u>int tab;<u>\t</u><i></i>'
 
add      58     '<u>\t</u><i></i>'
 
unmod    59  53 ' <i></i>'
 
del          54 '<u>\t</u>#define MAX_STEPS (48)'
 
add      60     '<u>\t</u><u class="cr"></u>'
 
add      61     '<u>\t</u>#define MAX_STEPS (64)<u class="cr"></u>'
 
unmod    62  55 ''
 
del          56 '<u>\t</u>#define MIN_STEPS (<del>48</del>)'
 
add      63     '<u>\t</u>#define MIN_STEPS (<ins>42</ins>)'
 
'''
0 comments (0 inline, 0 general)