Changeset - 99ffd118f6af
[Not reviewed]
default
0 1 0
Mads Kiilerich (mads) - 5 years ago 2021-02-16 22:54:27
mads@kiilerich.com
diffs: DiffParser should only HTML escape lines when generating HTML

Move DiffParser HTML escaping so it only is applied when generating HTML.
1 file changed with 36 insertions and 36 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/diffs.py
Show inline comments
 
@@ -399,15 +399,17 @@ class DiffProcessor(object):
 
                'stats':            stats,
 
            })
 

	
 
        if not html:
 
            return _files
 

	
 
        # highlight inline changes when one del is followed by one add
 
        for diff_data in _files:
 
            for chunk in diff_data['chunks']:
 
                for change in chunk:
 
                    change['line'] = _escaper(change['line'])
 
                # highlight inline changes when one del is followed by one add
 
                lineiter = iter(chunk)
 
                try:
 
                    peekline = next(lineiter)
 
                    while True:
 
                        # find a first del line
 
                        while peekline['action'] != 'del':
 
@@ -444,50 +446,50 @@ class DiffProcessor(object):
 

	
 
_escape_re = re.compile(r'(&)|(<)|(>)|(\t)($)?|(\r)|( $)')
 

	
 

	
 
def _escaper(diff_line):
 
    r"""
 
    Do HTML escaping/markup of a single diff line (including first +/- column)
 
    Do HTML escaping/markup of a single diff line (excluding first +/- column)
 

	
 
    >>> _escaper('foobar')
 
    'foobar'
 
    >>> _escaper('@foo & bar')
 
    '@foo &amp; bar'
 
    >>> _escaper('+foo < bar')
 
    '+foo &lt; bar'
 
    >>> _escaper('-foo > bar')
 
    '-foo &gt; bar'
 
    >>> _escaper(' <foo>')
 
    ' &lt;foo&gt;'
 
    >>> _escaper(' foo\tbar')
 
    ' foo<u>\t</u>bar'
 
    >>> _escaper(' foo\rbar\r')
 
    ' foo<u class="cr"></u>bar<u class="cr"></u>'
 
    >>> _escaper(' foo\t')
 
    ' foo<u>\t</u><i></i>'
 
    >>> _escaper(' foo ')
 
    ' foo <i></i>'
 
    >>> _escaper(' foo  ')
 
    ' foo  <i></i>'
 
    >>> _escaper('foo < bar')
 
    'foo &lt; bar'
 
    >>> _escaper('foo > bar')
 
    'foo &gt; bar'
 
    >>> _escaper('<foo>')
 
    '&lt;foo&gt;'
 
    >>> _escaper('foo\tbar')
 
    'foo<u>\t</u>bar'
 
    >>> _escaper('foo\rbar\r')
 
    'foo<u class="cr"></u>bar<u class="cr"></u>'
 
    >>> _escaper('foo\t')
 
    'foo<u>\t</u><i></i>'
 
    >>> _escaper('foo ')
 
    'foo <i></i>'
 
    >>> _escaper('foo  ')
 
    'foo  <i></i>'
 
    >>> _escaper('')
 
    ''
 
    >>> _escaper(' ')
 
    ' '
 
    >>> _escaper('  ')
 
    '  <i></i>'
 
    >>> _escaper(' \t')
 
    ' <u>\t</u><i></i>'
 
    >>> _escaper(' \t  ')
 
    ' <u>\t</u>  <i></i>'
 
    >>> _escaper('   \t')
 
    '   <u>\t</u><i></i>'
 
    >>> _escaper(' \t\t  ')
 
    ' <u>\t</u><u>\t</u>  <i></i>'
 
    >>> _escaper('   \t\t')
 
    '   <u>\t</u><u>\t</u><i></i>'
 
    >>> _escaper(' foo&bar<baz>  ')
 
    ' foo&amp;bar&lt;baz&gt;  <i></i>'
 
    ' <i></i>'
 
    >>> _escaper('\t')
 
    '<u>\t</u><i></i>'
 
    >>> _escaper('\t  ')
 
    '<u>\t</u>  <i></i>'
 
    >>> _escaper('  \t')
 
    '  <u>\t</u><i></i>'
 
    >>> _escaper('\t\t  ')
 
    '<u>\t</u><u>\t</u>  <i></i>'
 
    >>> _escaper('  \t\t')
 
    '  <u>\t</u><u>\t</u><i></i>'
 
    >>> _escaper('foo&bar<baz>  ')
 
    'foo&amp;bar&lt;baz&gt;  <i></i>'
 
    """
 

	
 
    def substitute(m):
 
        groups = m.groups()
 
        if groups[0]:
 
            return '&amp;'
 
@@ -499,14 +501,12 @@ def _escaper(diff_line):
 
            if groups[4] is not None:  # end of line
 
                return '<u>\t</u><i></i>'
 
            return '<u>\t</u>'
 
        if groups[5]:
 
            return '<u class="cr"></u>'
 
        if groups[6]:
 
            if m.start() == 0:
 
                return ' '  # first column space shouldn't make empty lines show up as trailing space
 
            return ' <i></i>'
 
        assert False
 

	
 
    return _escape_re.sub(substitute, diff_line)
 

	
 

	
 
@@ -591,13 +591,13 @@ def _get_header(vcs, diff_chunk):
 
    if rest:
 
        if _header_next_check.match(rest):
 
            raise Exception('cannot parse %s diff header: %r followed by %r' % (vcs, safe_str(bytes(diff_chunk[:match.end()])), safe_str(bytes(rest[:1000]))))
 
        if rest[-1:] != b'\n':
 
            # The diff will generally already have trailing \n (and be a memoryview). It might also be huge so we don't want to allocate it twice. But in this very rare case, we don't care.
 
            rest = bytes(rest) + b'\n'
 
    diff_lines = (_escaper(safe_str(m.group(1))) for m in re.finditer(br'(.*)\n', rest))
 
    diff_lines = (safe_str(m.group(1)) for m in re.finditer(br'(.*)\n', rest))
 
    return meta_info, diff_lines
 

	
 

	
 
_chunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)')
 
_newline_marker = re.compile(r'^\\ No newline at end of file')
 

	
0 comments (0 inline, 0 general)