Changeset - 0383ed91d4ed
[Not reviewed]
default
0 3 0
Mads Kiilerich (mads) - 5 years ago 2020-11-09 16:42:43
mads@kiilerich.com
Grafted from: 788b5492ab35
lib: move url_re to webutils
3 files changed with 6 insertions and 7 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/helpers.py
Show inline comments
 
@@ -15,61 +15,60 @@
 
Helper functions
 

	
 
Consists of functions to typically be used within templates, but also
 
available to Controllers. This module is available to both as 'h'.
 
"""
 
import hashlib
 
import logging
 
import re
 
import textwrap
 
import urllib.parse
 

	
 
from beaker.cache import cache_region
 
from pygments import highlight as code_highlight
 
from pygments.formatters.html import HtmlFormatter
 
from tg import tmpl_context as c
 
from tg.i18n import ugettext as _
 

	
 
import kallithea
 
from kallithea.lib.annotate import annotate_highlight
 
#==============================================================================
 
# PERMS
 
#==============================================================================
 
from kallithea.lib.auth import HasPermissionAny, HasRepoGroupPermissionLevel, HasRepoPermissionLevel
 
from kallithea.lib.diffs import BIN_FILENODE, CHMOD_FILENODE, DEL_FILENODE, MOD_FILENODE, NEW_FILENODE, RENAMED_FILENODE
 
from kallithea.lib.markup_renderer import url_re
 
from kallithea.lib.pygmentsutils import get_custom_lexer
 
from kallithea.lib.utils2 import (AttributeDict, age, asbool, credentials_filter, fmt_date, link_to_ref, safe_bytes, safe_int, safe_str, shorter,
 
                                  time_to_datetime)
 
from kallithea.lib.vcs.backends.base import BaseChangeset, EmptyChangeset
 
from kallithea.lib.vcs.exceptions import ChangesetDoesNotExistError
 
#==============================================================================
 
# SCM FILTERS available via h.
 
#==============================================================================
 
from kallithea.lib.vcs.utils import author_email, author_name
 
from kallithea.lib.webutils import (HTML, MENTIONS_REGEX, Option, canonical_url, checkbox, chop_at, end_form, escape, form, format_byte_size, hidden,
 
                                    html_escape, js, jshtml, link_to, literal, password, pop_flash_messages, radio, reset, safeid, select,
 
                                    session_csrf_secret_name, session_csrf_secret_token, submit, text, textarea, truncate, url, wrap_paragraphs)
 
                                    session_csrf_secret_name, session_csrf_secret_token, submit, text, textarea, truncate, url, url_re, wrap_paragraphs)
 
from kallithea.model import db
 
from kallithea.model.changeset_status import ChangesetStatusModel
 

	
 

	
 
# mute pyflakes "imported but unused"
 
# from webutils
 
assert HTML
 
assert Option
 
assert canonical_url
 
assert checkbox
 
assert chop_at
 
assert end_form
 
assert form
 
assert format_byte_size
 
assert hidden
 
assert js
 
assert jshtml
 
assert password
 
assert pop_flash_messages
 
assert radio
 
assert reset
 
assert safeid
 
assert select
 
assert session_csrf_secret_name
kallithea/lib/markup_renderer.py
Show inline comments
 
@@ -21,52 +21,48 @@ This file was forked by the Kallithea pr
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Oct 27, 2011
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 

	
 
import hashlib
 
import logging
 
import re
 
import traceback
 

	
 
import bleach
 
import markdown as markdown_mod
 
from docutils.core import publish_parts
 
from docutils.parsers.rst import directives
 

	
 
from kallithea.lib import webutils
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
url_re = re.compile(r'''\bhttps?://(?:[\da-zA-Z0-9@:.-]+)'''
 
                    r'''(?:[/a-zA-Z0-9_=@#~&+%.,:;?!*()-]*[/a-zA-Z0-9_=@#~])?''')
 

	
 

	
 
class MarkupRenderer(object):
 
    RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
 

	
 
    MARKDOWN_PAT = re.compile(r'md|mkdn?|mdown|markdown', re.IGNORECASE)
 
    RST_PAT = re.compile(r're?st', re.IGNORECASE)
 
    PLAIN_PAT = re.compile(r'readme', re.IGNORECASE)
 

	
 
    @classmethod
 
    def _detect_renderer(cls, source, filename):
 
        """
 
        runs detection of what renderer should be used for generating html
 
        from a markup language
 

	
 
        filename can be also explicitly a renderer name
 
        """
 

	
 
        if cls.MARKDOWN_PAT.findall(filename):
 
            return cls.markdown
 
        elif cls.RST_PAT.findall(filename):
 
            return cls.rst
 
        elif cls.PLAIN_PAT.findall(filename):
 
            return cls.rst
 
        return cls.plain
 

	
 
@@ -142,49 +138,49 @@ class MarkupRenderer(object):
 
        return bleach.clean(readme_data,
 
            tags=['a', 'abbr', 'b', 'blockquote', 'br', 'code', 'dd',
 
                  'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5',
 
                  'h6', 'hr', 'i', 'img', 'li', 'ol', 'p', 'pre', 'span',
 
                  'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'th',
 
                  'thead', 'tr', 'ul'],
 
            attributes=['class', 'id', 'style', 'label', 'title', 'alt', 'href', 'src'],
 
            styles=['color'],
 
            protocols=['http', 'https', 'mailto'],
 
            )
 

	
 
    @classmethod
 
    def plain(cls, source, universal_newline=True):
 
        """
 
        >>> MarkupRenderer.plain('https://example.com/')
 
        '<br /><a href="https://example.com/">https://example.com/</a>'
 
        """
 
        if universal_newline:
 
            newline = '\n'
 
            source = newline.join(source.splitlines())
 

	
 
        def url_func(match_obj):
 
            url_full = match_obj.group(0)
 
            return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
 
        source = url_re.sub(url_func, source)
 
        source = webutils.url_re.sub(url_func, source)
 
        return '<br />' + source.replace("\n", '<br />')
 

	
 
    @classmethod
 
    def markdown(cls, source, safe=True, flavored=False):
 
        """
 
        Convert Markdown (possibly GitHub Flavored) to INSECURE HTML, possibly
 
        with "safe" fall-back to plaintext. Output from this method should be sanitized before use.
 

	
 
        >>> MarkupRenderer.markdown('''<img id="a" style="margin-top:-1000px;color:red" src="http://example.com/test.jpg">''')
 
        '<p><img id="a" style="margin-top:-1000px;color:red" src="http://example.com/test.jpg"></p>'
 
        >>> MarkupRenderer.markdown('''<img class="c d" src="file://localhost/test.jpg">''')
 
        '<p><img class="c d" src="file://localhost/test.jpg"></p>'
 
        >>> MarkupRenderer.markdown('''<a href="foo">foo</a>''')
 
        '<p><a href="foo">foo</a></p>'
 
        >>> MarkupRenderer.markdown('''<script>alert(1)</script>''')
 
        '<script>alert(1)</script>'
 
        >>> MarkupRenderer.markdown('''<div onclick="alert(2)">yo</div>''')
 
        '<div onclick="alert(2)">yo</div>'
 
        >>> MarkupRenderer.markdown('''<a href="javascript:alert(3)">yo</a>''')
 
        '<p><a href="javascript:alert(3)">yo</a></p>'
 
        >>> MarkupRenderer.markdown('''## Foo''')
 
        '<h2>Foo</h2>'
 
        >>> print(MarkupRenderer.markdown('''
 
        ...     #!/bin/bash
kallithea/lib/webutils.py
Show inline comments
 
@@ -288,38 +288,42 @@ def js(value):
 
    """
 
    return literal(
 
        ('(' + json.dumps(value) + ')')
 
        # In JSON, the following can only appear in string literals.
 
        .replace('&', r'\x26')
 
        .replace('<', r'\x3c')
 
        .replace('>', r'\x3e')
 
    )
 

	
 

	
 
def jshtml(val):
 
    """HTML escapes a string value, then converts the resulting string
 
    to its corresponding JavaScript representation (see `js`).
 

	
 
    This is used when a plain-text string (possibly containing special
 
    HTML characters) will be used by a script in an HTML context (e.g.
 
    element.innerHTML or jQuery's 'html' method).
 

	
 
    If in doubt, err on the side of using `jshtml` over `js`, since it's
 
    better to escape too much than too little.
 
    """
 
    return js(escape(val))
 

	
 

	
 
url_re = re.compile(r'''\bhttps?://(?:[\da-zA-Z0-9@:.-]+)'''
 
                    r'''(?:[/a-zA-Z0-9_=@#~&+%.,:;?!*()-]*[/a-zA-Z0-9_=@#~])?''')
 

	
 

	
 
# Must match regexp in kallithea/public/js/base.js MentionsAutoComplete()
 
# Check char before @ - it must not look like we are in an email addresses.
 
# Matching is greedy so we don't have to look beyond the end.
 
MENTIONS_REGEX = re.compile(r'(?:^|(?<=[^a-zA-Z0-9]))@([a-zA-Z0-9][-_.a-zA-Z0-9]*[a-zA-Z0-9])')
 

	
 

	
 
def extract_mentioned_usernames(text):
 
    r"""
 
    Returns list of (possible) usernames @mentioned in given text.
 

	
 
    >>> extract_mentioned_usernames('@1-2.a_X,@1234 not@not @ddd@not @n @ee @ff @gg, @gg;@hh @n\n@zz,')
 
    ['1-2.a_X', '1234', 'ddd', 'ee', 'ff', 'gg', 'gg', 'hh', 'zz']
 
    """
 
    return MENTIONS_REGEX.findall(text)
0 comments (0 inline, 0 general)