Changeset - 8993d401575b
[Not reviewed]
stable
0 1 0
Mads Kiilerich (mads) - 4 years ago 2022-02-07 19:07:08
mads@kiilerich.com
Grafted from: e7930d122a44
files: fix raw download of repo files with names with unicode points above 256 in name

Raw download had apparently only been tested with non-ascii characters that
were latin1. That was apparently a (too) simple case that worked without
crashing.

Files with unicode code points above 256 in their name would fail to download,
when Waitress failed like this, trying to get a real byte string by encoding
WSGI headers to latin1:
UnicodeEncodeError: 'latin-1' codec can't encode characters in position 84-85: ordinal not in range(256)

HTTP headers are of course byte strings on the network, but Python3 WSGI does
unfortunately neither expose it as bytes nor as unicode strings to be encoded
as utf-8. Instead, it uses unicode strings with byte values encoded as code
points 0-255. That is achieved by decoding the utf-8 encoded bytes as latin1.

For raw downloads, the recommended download filename is provided in the
Content-Disposition header. The problem is that it was provided as a real
unicode string.

Fixed by applying the "proper" latin1-decoding of a utf8-encoding.
1 file changed with 2 insertions and 2 deletions:
0 comments (0 inline, 0 general)
kallithea/controllers/files.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.controllers.files
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
Files controller for Kallithea
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Apr 21, 2010
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
import logging
 
import os
 
import posixpath
 
import shutil
 
import tempfile
 
import traceback
 
from collections import OrderedDict
 

	
 
from tg import request, response
 
from tg import tmpl_context as c
 
from tg.i18n import ugettext as _
 
from webob.exc import HTTPFound, HTTPNotFound
 

	
 
import kallithea
 
import kallithea.lib.helpers as h
 
from kallithea.controllers import base
 
from kallithea.lib import diffs, webutils
 
from kallithea.lib.auth import HasRepoPermissionLevelDecorator, LoginRequired
 
from kallithea.lib.exceptions import NonRelativePathError
 
from kallithea.lib.utils2 import asbool, convert_line_endings, detect_mode, safe_str
 
from kallithea.lib.utils2 import asbool, convert_line_endings, detect_mode, safe_bytes, safe_str
 
from kallithea.lib.vcs.backends.base import EmptyChangeset
 
from kallithea.lib.vcs.conf import settings
 
from kallithea.lib.vcs.exceptions import (ChangesetDoesNotExistError, ChangesetError, EmptyRepositoryError, ImproperArchiveTypeError, NodeAlreadyExistsError,
 
                                          NodeDoesNotExistError, NodeError, RepositoryError, VCSError)
 
from kallithea.lib.vcs.nodes import FileNode
 
from kallithea.lib.vcs.utils import author_email
 
from kallithea.lib.webutils import url
 
from kallithea.model import userlog
 
from kallithea.model.repo import RepoModel
 
from kallithea.model.scm import ScmModel
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class FilesController(base.BaseRepoController):
 

	
 
    def _before(self, *args, **kwargs):
 
        super(FilesController, self)._before(*args, **kwargs)
 

	
 
    def __get_cs(self, rev, silent_empty=False):
 
        """
 
        Safe way to get changeset if error occur it redirects to tip with
 
        proper message
 

	
 
        :param rev: revision to fetch
 
        :silent_empty: return None if repository is empty
 
        """
 

	
 
        try:
 
            return c.db_repo_scm_instance.get_changeset(rev)
 
        except EmptyRepositoryError as e:
 
            if silent_empty:
 
                return None
 
            url_ = url('files_add_home',
 
                       repo_name=c.repo_name,
 
                       revision=0, f_path='', anchor='edit')
 
            add_new = webutils.link_to(_('Click here to add new file'), url_, class_="alert-link")
 
            webutils.flash(_('There are no files yet.') + ' ' + add_new, category='warning')
 
            raise HTTPNotFound()
 
        except (ChangesetDoesNotExistError, LookupError):
 
            msg = _('Such revision does not exist for this repository')
 
            webutils.flash(msg, category='error')
 
            raise HTTPNotFound()
 
        except RepositoryError as e:
 
            webutils.flash(e, category='error')
 
            raise HTTPNotFound()
 

	
 
@@ -188,97 +188,97 @@ class FilesController(base.BaseRepoContr
 
        if c.db_repo_scm_instance.closed_branches:
 
            prefix = _('(closed)') + ' '
 
            c.revision_options += [('-', '-')] + \
 
                [(n, prefix + b) for b, n in c.db_repo_scm_instance.closed_branches.items()]
 

	
 
        return base.render('files/files.html')
 

	
 
    @LoginRequired(allow_default_user=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    @base.jsonify
 
    def history(self, repo_name, revision, f_path):
 
        changeset = self.__get_cs(revision)
 
        _file = changeset.get_node(f_path)
 
        if _file.is_file():
 
            file_history, _hist = self._get_node_history(changeset, f_path)
 

	
 
            res = []
 
            for obj in file_history:
 
                res.append({
 
                    'text': obj[1],
 
                    'children': [{'id': o[0], 'text': o[1]} for o in obj[0]]
 
                })
 

	
 
            data = {
 
                'more': False,
 
                'results': res
 
            }
 
            return data
 

	
 
    @LoginRequired(allow_default_user=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    def authors(self, repo_name, revision, f_path):
 
        changeset = self.__get_cs(revision)
 
        _file = changeset.get_node(f_path)
 
        if _file.is_file():
 
            file_history, _hist = self._get_node_history(changeset, f_path)
 
            c.authors = []
 
            for a in set([x.author for x in _hist]):
 
                c.authors.append((author_email(a), h.person(a)))
 
            return base.render('files/files_history_box.html')
 

	
 
    @LoginRequired(allow_default_user=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    def rawfile(self, repo_name, revision, f_path):
 
        cs = self.__get_cs(revision)
 
        file_node = self.__get_filenode(cs, f_path)
 

	
 
        response.content_disposition = \
 
            'attachment; filename=%s' % f_path.split(kallithea.URL_SEP)[-1]
 
            'attachment; filename=%s' % safe_bytes(f_path.split(kallithea.URL_SEP)[-1]).decode('latin1')
 

	
 
        response.content_type = file_node.mimetype
 
        return file_node.content
 

	
 
    @LoginRequired(allow_default_user=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    def raw(self, repo_name, revision, f_path):
 
        cs = self.__get_cs(revision)
 
        file_node = self.__get_filenode(cs, f_path)
 

	
 
        raw_mimetype_mapping = {
 
            # map original mimetype to a mimetype used for "show as raw"
 
            # you can also provide a content-disposition to override the
 
            # default "attachment" disposition.
 
            # orig_type: (new_type, new_dispo)
 

	
 
            # show images inline:
 
            'image/x-icon': ('image/x-icon', 'inline'),
 
            'image/png': ('image/png', 'inline'),
 
            'image/gif': ('image/gif', 'inline'),
 
            'image/jpeg': ('image/jpeg', 'inline'),
 
            'image/svg+xml': ('image/svg+xml', 'inline'),
 
        }
 

	
 
        mimetype = file_node.mimetype
 
        try:
 
            mimetype, dispo = raw_mimetype_mapping[mimetype]
 
        except KeyError:
 
            # we don't know anything special about this, handle it safely
 
            if file_node.is_binary:
 
                # do same as download raw for binary files
 
                mimetype, dispo = 'application/octet-stream', 'attachment'
 
            else:
 
                # do not just use the original mimetype, but force text/plain,
 
                # otherwise it would serve text/html and that might be unsafe.
 
                # Note: underlying vcs library fakes text/plain mimetype if the
 
                # mimetype can not be determined and it thinks it is not
 
                # binary.This might lead to erroneous text display in some
 
                # cases, but helps in other cases, like with text files
 
                # without extension.
 
                mimetype, dispo = 'text/plain', 'inline'
 

	
 
        if dispo == 'attachment':
 
            dispo = 'attachment; filename=%s' % f_path.split(os.sep)[-1]
 

	
 
        response.content_disposition = dispo
 
        response.content_type = mimetype
 
        return file_node.content
0 comments (0 inline, 0 general)