Changeset - 16a359ce1801
[Not reviewed]
default
0 5 0
Mads Kiilerich (mads) - 5 years ago 2020-10-11 11:52:22
mads@kiilerich.com
Grafted from: 8b60096d8e92
vcs: move changeset diff from controller to vcs

Remove unfortunate model dependency on controller ... and put the VCS details
where they belong, with less VCS specific knowledge in the controllers.
5 files changed with 127 insertions and 111 deletions:
0 comments (0 inline, 0 general)
kallithea/controllers/compare.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.controllers.compare
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
compare controller showing differences between two
 
repos, branches, bookmarks or tips
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: May 6, 2012
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 

	
 
import logging
 
import re
 

	
 
import mercurial.unionrepo
 
from tg import request
 
from tg import tmpl_context as c
 
from tg.i18n import ugettext as _
 
from webob.exc import HTTPBadRequest, HTTPFound, HTTPNotFound
 

	
 
from kallithea.config.routing import url
 
from kallithea.lib import diffs
 
from kallithea.lib import helpers as h
 
from kallithea.lib.auth import HasRepoPermissionLevelDecorator, LoginRequired
 
from kallithea.lib.base import BaseRepoController, render
 
from kallithea.lib.graphmod import graph_data
 
from kallithea.lib.utils2 import ascii_bytes, ascii_str, safe_bytes
 
from kallithea.model.db import Repository
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class CompareController(BaseRepoController):
 

	
 
    def _before(self, *args, **kwargs):
 
        super(CompareController, self)._before(*args, **kwargs)
 

	
 
        # The base repository has already been retrieved.
 
        c.a_repo = c.db_repo
 

	
 
        # Retrieve the "changeset" repository (default: same as base).
 
        other_repo = request.GET.get('other_repo', None)
 
        if other_repo is None:
 
            c.cs_repo = c.a_repo
 
        else:
 
            c.cs_repo = Repository.get_by_repo_name(other_repo)
 
            if c.cs_repo is None:
 
                msg = _('Could not find other repository %s') % other_repo
 
                h.flash(msg, category='error')
 
                raise HTTPFound(location=url('compare_home', repo_name=c.a_repo.repo_name))
 

	
 
        # Verify that it's even possible to compare these two repositories.
 
        if c.a_repo.scm_instance.alias != c.cs_repo.scm_instance.alias:
 
            msg = _('Cannot compare repositories of different types')
 
            h.flash(msg, category='error')
 
            raise HTTPFound(location=url('compare_home', repo_name=c.a_repo.repo_name))
 

	
 
    @staticmethod
 
    def _get_changesets(alias, org_repo, org_rev, other_repo, other_rev):
 
        """
 
        Returns lists of changesets that can be merged from org_repo@org_rev
 
        to other_repo@other_rev
 
        ... and the other way
 
        ... and the ancestors that would be used for merge
 

	
 
        :param org_repo: repo object, that is most likely the original repo we forked from
 
        :param org_rev: the revision we want our compare to be made
 
        :param other_repo: repo object, most likely the fork of org_repo. It has
 
            all changesets that we need to obtain
 
        :param other_rev: revision we want out compare to be made on other_repo
 
        """
 
        ancestors = None
 
        if org_rev == other_rev:
 
            org_changesets = []
 
            other_changesets = []
 

	
 
        elif alias == 'hg':
 
            # case two independent repos
 
            if org_repo != other_repo:
 
                hgrepo = mercurial.unionrepo.makeunionrepository(other_repo.baseui,
 
                                                       safe_bytes(other_repo.path),
 
                                                       safe_bytes(org_repo.path))
 
                # all ancestors of other_rev will be in other_repo and
 
                # rev numbers from hgrepo can be used in other_repo - org_rev ancestors cannot
 

	
 
            # no remote compare do it on the same repository
 
            else:
 
                hgrepo = other_repo._repo
 

	
 
            ancestors = [ascii_str(hgrepo[ancestor].hex()) for ancestor in
 
                         hgrepo.revs(b"id(%s) & ::id(%s)", ascii_bytes(other_rev), ascii_bytes(org_rev))]
 
            if ancestors:
 
                log.debug("shortcut found: %s is already an ancestor of %s", other_rev, org_rev)
 
            else:
 
                log.debug("no shortcut found: %s is not an ancestor of %s", other_rev, org_rev)
 
                ancestors = [ascii_str(hgrepo[ancestor].hex()) for ancestor in
 
                             hgrepo.revs(b"heads(::id(%s) & ::id(%s))", ascii_bytes(org_rev), ascii_bytes(other_rev))] # FIXME: expensive!
 

	
 
            other_changesets = [
 
                other_repo.get_changeset(rev)
 
                for rev in hgrepo.revs(
 
                    b"ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)",
 
                    ascii_bytes(other_rev), ascii_bytes(org_rev), ascii_bytes(org_rev))
 
            ]
 
            org_changesets = [
 
                org_repo.get_changeset(ascii_str(hgrepo[rev].hex()))
 
                for rev in hgrepo.revs(
 
                    b"ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)",
 
                    ascii_bytes(org_rev), ascii_bytes(other_rev), ascii_bytes(other_rev))
 
            ]
 

	
 
        elif alias == 'git':
 
            if org_repo != other_repo:
 
                from dulwich.client import SubprocessGitClient
 
                from dulwich.repo import Repo
 

	
 
                gitrepo = Repo(org_repo.path)
 
                SubprocessGitClient(thin_packs=False).fetch(other_repo.path, gitrepo)
 

	
 
                gitrepo_remote = Repo(other_repo.path)
 
                SubprocessGitClient(thin_packs=False).fetch(org_repo.path, gitrepo_remote)
 

	
 
                revs = [
 
                    ascii_str(x.commit.id)
 
                    for x in gitrepo_remote.get_walker(include=[ascii_bytes(other_rev)],
 
                                                       exclude=[ascii_bytes(org_rev)])
 
                ]
 
                other_changesets = [other_repo.get_changeset(rev) for rev in reversed(revs)]
 
                if other_changesets:
 
                    ancestors = [other_changesets[0].parents[0].raw_id]
 
                else:
 
                    # no changesets from other repo, ancestor is the other_rev
 
                    ancestors = [other_rev]
 

	
 
                gitrepo.close()
 
                gitrepo_remote.close()
 

	
 
            else:
 
                so = org_repo.run_git_command(
 
                    ['log', '--reverse', '--pretty=format:%H',
 
                     '-s', '%s..%s' % (org_rev, other_rev)]
 
                )
 
                other_changesets = [org_repo.get_changeset(cs)
 
                              for cs in re.findall(r'[0-9a-fA-F]{40}', so)]
 
                so = org_repo.run_git_command(
 
                    ['merge-base', org_rev, other_rev]
 
                )
 
                ancestors = [re.findall(r'[0-9a-fA-F]{40}', so)[0]]
 
            org_changesets = []
 

	
 
        else:
 
            raise Exception('Bad alias only git and hg is allowed')
 

	
 
        return other_changesets, org_changesets, ancestors
 

	
 
    @LoginRequired(allow_default_user=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    def index(self, repo_name):
 
        c.compare_home = True
 
        c.a_ref_name = c.cs_ref_name = None
 
        return render('compare/compare_diff.html')
 

	
 
    @LoginRequired(allow_default_user=True)
 
    @HasRepoPermissionLevelDecorator('read')
 
    def compare(self, repo_name, org_ref_type, org_ref_name, other_ref_type, other_ref_name):
 
        org_ref_name = org_ref_name.strip()
 
        other_ref_name = other_ref_name.strip()
 

	
 
        # If merge is True:
 
        #   Show what org would get if merged with other:
 
        #   List changesets that are ancestors of other but not of org.
 
        #   New changesets in org is thus ignored.
 
        #   Diff will be from common ancestor, and merges of org to other will thus be ignored.
 
        # If merge is False:
 
        #   Make a raw diff from org to other, no matter if related or not.
 
        #   Changesets in one and not in the other will be ignored
 
        merge = bool(request.GET.get('merge'))
 
        # fulldiff disables cut_off_limit
 
        fulldiff = request.GET.get('fulldiff')
 
        # partial uses compare_cs.html template directly
 
        partial = request.environ.get('HTTP_X_PARTIAL_XHR')
 
        # is_ajax_preview puts hidden input field with changeset revisions
 
        c.is_ajax_preview = partial and request.GET.get('is_ajax_preview')
 
        # swap url for compare_diff page - never partial and never is_ajax_preview
 
        c.swap_url = h.url('compare_url',
 
            repo_name=c.cs_repo.repo_name,
 
            org_ref_type=other_ref_type, org_ref_name=other_ref_name,
 
            other_repo=c.a_repo.repo_name,
 
            other_ref_type=org_ref_type, other_ref_name=org_ref_name,
 
            merge=merge or '')
 
        ignore_whitespace_diff = h.get_ignore_whitespace_diff(request.GET)
 
        diff_context_size = h.get_diff_context_size(request.GET)
 

	
 
        c.a_rev = self._get_ref_rev(c.a_repo, org_ref_type, org_ref_name,
 
            returnempty=True)
 
        c.cs_rev = self._get_ref_rev(c.cs_repo, other_ref_type, other_ref_name)
 

	
 
        c.compare_home = False
 
        c.a_ref_name = org_ref_name
 
        c.a_ref_type = org_ref_type
 
        c.cs_ref_name = other_ref_name
 
        c.cs_ref_type = other_ref_type
 

	
 
        c.cs_ranges, c.cs_ranges_org, c.ancestors = self._get_changesets(
 
            c.a_repo.scm_instance.alias, c.a_repo.scm_instance, c.a_rev,
 
            c.cs_repo.scm_instance, c.cs_rev)
 
        c.cs_ranges, c.cs_ranges_org, c.ancestors = c.a_repo.scm_instance.get_diff_changesets(
 
            c.a_rev, c.cs_repo.scm_instance, c.cs_rev)
 
        raw_ids = [x.raw_id for x in c.cs_ranges]
 
        c.cs_comments = c.cs_repo.get_comments(raw_ids)
 
        c.cs_statuses = c.cs_repo.statuses(raw_ids)
 

	
 
        revs = [ctx.revision for ctx in reversed(c.cs_ranges)]
 
        c.jsdata = graph_data(c.cs_repo.scm_instance, revs)
 

	
 
        if partial:
 
            return render('compare/compare_cs.html')
 

	
 
        org_repo = c.a_repo
 
        other_repo = c.cs_repo
 

	
 
        if merge:
 
            rev1 = msg = None
 
            if not c.cs_ranges:
 
                msg = _('Cannot show empty diff')
 
            elif not c.ancestors:
 
                msg = _('No ancestor found for merge diff')
 
            elif len(c.ancestors) == 1:
 
                rev1 = c.ancestors[0]
 
            else:
 
                msg = _('Multiple merge ancestors found for merge compare')
 
            if rev1 is None:
 
                h.flash(msg, category='error')
 
                log.error(msg)
 
                raise HTTPNotFound
 

	
 
            # case we want a simple diff without incoming changesets,
 
            # previewing what will be merged.
 
            # Make the diff on the other repo (which is known to have other_rev)
 
            log.debug('Using ancestor %s as rev1 instead of %s',
 
                      rev1, c.a_rev)
 
            org_repo = other_repo
 
        else: # comparing tips, not necessarily linearly related
 
            if org_repo != other_repo:
 
                # TODO: we could do this by using hg unionrepo
 
                log.error('cannot compare across repos %s and %s', org_repo, other_repo)
 
                h.flash(_('Cannot compare repositories without using common ancestor'), category='error')
 
                raise HTTPBadRequest
 
            rev1 = c.a_rev
 

	
 
        diff_limit = None if fulldiff else self.cut_off_limit
 

	
 
        log.debug('running diff between %s and %s in %s',
 
                  rev1, c.cs_rev, org_repo.scm_instance.path)
 
        raw_diff = diffs.get_diff(org_repo.scm_instance, rev1=rev1, rev2=c.cs_rev,
 
                                      ignore_whitespace=ignore_whitespace_diff,
 
                                      context=diff_context_size)
 

	
 
        diff_processor = diffs.DiffProcessor(raw_diff, diff_limit=diff_limit)
 
        c.limited_diff = diff_processor.limited_diff
 
        c.file_diff_data = []
 
        c.lines_added = 0
 
        c.lines_deleted = 0
 
        for f in diff_processor.parsed:
 
            st = f['stats']
 
            c.lines_added += st['added']
 
            c.lines_deleted += st['deleted']
 
            filename = f['filename']
 
            fid = h.FID('', filename)
 
            html_diff = diffs.as_html(enable_comments=False, parsed_lines=[f])
 
            c.file_diff_data.append((fid, None, f['operation'], f['old_filename'], filename, html_diff, st))
 

	
 
        return render('compare/compare_diff.html')
kallithea/lib/vcs/backends/base.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
"""
 
    vcs.backends.base
 
    ~~~~~~~~~~~~~~~~~
 

	
 
    Base for all available scm backends
 

	
 
    :created_on: Apr 8, 2010
 
    :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
 
"""
 

	
 
import datetime
 
import itertools
 

	
 
from kallithea.lib.vcs.backends import get_backend
 
from kallithea.lib.vcs.conf import settings
 
from kallithea.lib.vcs.exceptions import (ChangesetError, EmptyRepositoryError, NodeAlreadyAddedError, NodeAlreadyChangedError, NodeAlreadyExistsError,
 
                                          NodeAlreadyRemovedError, NodeDoesNotExistError, NodeNotChangedError, RepositoryError)
 
from kallithea.lib.vcs.utils import author_email, author_name
 
from kallithea.lib.vcs.utils.helpers import get_dict_for_attrs
 
from kallithea.lib.vcs.utils.lazy import LazyProperty
 

	
 

	
 
class BaseRepository(object):
 
    """
 
    Base Repository for final backends
 

	
 
    **Attributes**
 

	
 
        ``DEFAULT_BRANCH_NAME``
 
            name of default branch (i.e. "master" for git etc.
 

	
 
        ``scm``
 
            alias of scm, i.e. *git* or *hg*
 

	
 
        ``repo``
 
            object from external api
 

	
 
        ``revisions``
 
            list of all available revisions' ids, in ascending order
 

	
 
        ``changesets``
 
            storage dict caching returned changesets
 

	
 
        ``path``
 
            absolute path to the repository
 

	
 
        ``branches``
 
            branches as list of changesets
 

	
 
        ``tags``
 
            tags as list of changesets
 
    """
 
    scm = None
 
    DEFAULT_BRANCH_NAME = None
 
    EMPTY_CHANGESET = '0' * 40
 

	
 
    def __init__(self, repo_path, create=False, **kwargs):
 
        """
 
        Initializes repository. Raises RepositoryError if repository could
 
        not be find at the given ``repo_path`` or directory at ``repo_path``
 
        exists and ``create`` is set to True.
 

	
 
        :param repo_path: local path of the repository
 
        :param create=False: if set to True, would try to create repository.
 
        :param src_url=None: if set, should be proper url from which repository
 
          would be cloned; requires ``create`` parameter to be set to True -
 
          raises RepositoryError if src_url is set and create evaluates to
 
          False
 
        """
 
        raise NotImplementedError
 

	
 
    def __str__(self):
 
        return '<%s at %s>' % (self.__class__.__name__, self.path)
 

	
 
    def __repr__(self):
 
        return self.__str__()
 

	
 
    def __len__(self):
 
        return self.count()
 

	
 
    def __eq__(self, other):
 
        same_instance = isinstance(other, self.__class__)
 
        return same_instance and getattr(other, 'path', None) == self.path
 

	
 
    def __ne__(self, other):
 
        return not self.__eq__(other)
 

	
 
    @LazyProperty
 
    def alias(self):
 
        for k, v in settings.BACKENDS.items():
 
            if v.split('.')[-1] == str(self.__class__.__name__):
 
                return k
 

	
 
    @LazyProperty
 
    def name(self):
 
        """
 
        Return repository name (without group name)
 
        """
 
        raise NotImplementedError
 

	
 
    @LazyProperty
 
    def owner(self):
 
        raise NotImplementedError
 

	
 
    @LazyProperty
 
    def description(self):
 
        raise NotImplementedError
 

	
 
    @LazyProperty
 
    def size(self):
 
        """
 
        Returns combined size in bytes for all repository files
 
        """
 

	
 
        size = 0
 
        try:
 
            tip = self.get_changeset()
 
            for topnode, dirs, files in tip.walk('/'):
 
                for f in files:
 
                    size += tip.get_file_size(f.path)
 

	
 
        except RepositoryError as e:
 
            pass
 
        return size
 

	
 
    def is_valid(self):
 
        """
 
        Validates repository.
 
        """
 
        raise NotImplementedError
 

	
 
    def is_empty(self):
 
        return self._empty
 

	
 
    #==========================================================================
 
    # CHANGESETS
 
    #==========================================================================
 

	
 
    def get_changeset(self, revision=None):
 
        """
 
        Returns instance of ``Changeset`` class. If ``revision`` is None, most
 
        recent changeset is returned.
 

	
 
        :raises ``EmptyRepositoryError``: if there are no revisions
 
        """
 
        raise NotImplementedError
 

	
 
    def __iter__(self):
 
        """
 
        Allows Repository objects to be iterated.
 

	
 
        *Requires* implementation of ``__getitem__`` method.
 
        """
 
        for revision in self.revisions:
 
            yield self.get_changeset(revision)
 

	
 
    def get_changesets(self, start=None, end=None, start_date=None,
 
                       end_date=None, branch_name=None, reverse=False, max_revisions=None):
 
        """
 
        Returns iterator of ``BaseChangeset`` objects from start to end,
 
        both inclusive.
 

	
 
        :param start: None or str
 
        :param end: None or str
 
        :param start_date:
 
        :param end_date:
 
        :param branch_name:
 
        :param reversed:
 
        """
 
        raise NotImplementedError
 

	
 
    def get_diff_changesets(self, org_rev, other_repo, other_rev):
 
        """
 
        Returns lists of changesets that can be merged from this repo @org_rev
 
        to other_repo @other_rev
 
        ... and the other way
 
        ... and the ancestors that would be used for merge
 

	
 
        :param org_rev: the revision we want our compare to be made
 
        :param other_repo: repo object, most likely the fork of org_repo. It has
 
            all changesets that we need to obtain
 
        :param other_rev: revision we want out compare to be made on other_repo
 
        """
 
        raise NotImplementedError
 

	
 
    def __getitem__(self, key):
 
        if isinstance(key, slice):
 
            return (self.get_changeset(rev) for rev in self.revisions[key])
 
        return self.get_changeset(key)
 

	
 
    def count(self):
 
        return len(self.revisions)
 

	
 
    def tag(self, name, user, revision=None, message=None, date=None, **opts):
 
        """
 
        Creates and returns a tag for the given ``revision``.
 

	
 
        :param name: name for new tag
 
        :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
 
        :param revision: changeset id for which new tag would be created
 
        :param message: message of the tag's commit
 
        :param date: date of tag's commit
 

	
 
        :raises TagAlreadyExistError: if tag with same name already exists
 
        """
 
        raise NotImplementedError
 

	
 
    def remove_tag(self, name, user, message=None, date=None):
 
        """
 
        Removes tag with the given ``name``.
 

	
 
        :param name: name of the tag to be removed
 
        :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
 
        :param message: message of the tag's removal commit
 
        :param date: date of tag's removal commit
 

	
 
        :raises TagDoesNotExistError: if tag with given name does not exists
 
        """
 
        raise NotImplementedError
 

	
 
    def get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
 
            context=3):
 
        """
 
        Returns (git like) *diff*, as plain text. Shows changes introduced by
 
        ``rev2`` since ``rev1``.
 

	
 
        :param rev1: Entry point from which diff is shown. Can be
 
          ``self.EMPTY_CHANGESET`` - in this case, patch showing all
 
          the changes since empty state of the repository until ``rev2``
 
        :param rev2: Until which revision changes should be shown.
 
        :param ignore_whitespace: If set to ``True``, would not show whitespace
 
          changes. Defaults to ``False``.
 
        :param context: How many lines before/after changed lines should be
 
          shown. Defaults to ``3``.
 
        """
 
        raise NotImplementedError
 

	
 
    # ========== #
 
    # COMMIT API #
 
    # ========== #
 

	
 
    @LazyProperty
 
    def in_memory_changeset(self):
 
        """
 
        Returns ``InMemoryChangeset`` object for this repository.
 
        """
 
        raise NotImplementedError
 

	
 
    def add(self, filenode, **kwargs):
 
        """
 
        Commit api function that will add given ``FileNode`` into this
 
        repository.
 

	
 
        :raises ``NodeAlreadyExistsError``: if there is a file with same path
 
          already in repository
 
        :raises ``NodeAlreadyAddedError``: if given node is already marked as
 
          *added*
 
        """
 
        raise NotImplementedError
 

	
 
    def remove(self, filenode, **kwargs):
 
        """
 
        Commit api function that will remove given ``FileNode`` into this
 
        repository.
 

	
 
        :raises ``EmptyRepositoryError``: if there are no changesets yet
 
        :raises ``NodeDoesNotExistError``: if there is no file with given path
 
        """
 
        raise NotImplementedError
 

	
 
    def commit(self, message, **kwargs):
 
        """
 
        Persists current changes made on this repository and returns newly
 
        created changeset.
 
        """
 
        raise NotImplementedError
 

	
 
    def get_state(self):
 
        """
 
        Returns dictionary with ``added``, ``changed`` and ``removed`` lists
 
        containing ``FileNode`` objects.
 
        """
 
        raise NotImplementedError
 

	
 
    def get_config_value(self, section, name, config_file=None):
 
        """
 
        Returns configuration value for a given [``section``] and ``name``.
 

	
 
        :param section: Section we want to retrieve value from
 
        :param name: Name of configuration we want to retrieve
 
        :param config_file: A path to file which should be used to retrieve
 
          configuration from (might also be a list of file paths)
 
        """
 
        raise NotImplementedError
 

	
 
    def get_user_name(self, config_file=None):
 
        """
 
        Returns user's name from global configuration file.
 

	
 
        :param config_file: A path to file which should be used to retrieve
 
          configuration from (might also be a list of file paths)
 
        """
 
        raise NotImplementedError
 

	
 
    def get_user_email(self, config_file=None):
 
        """
 
        Returns user's email from global configuration file.
 

	
 
        :param config_file: A path to file which should be used to retrieve
 
          configuration from (might also be a list of file paths)
 
        """
 
        raise NotImplementedError
 

	
 
    # =========== #
 
    # WORKDIR API #
 
    # =========== #
 

	
 
    @LazyProperty
 
    def workdir(self):
 
        """
 
        Returns ``Workdir`` instance for this repository.
 
        """
 
        raise NotImplementedError
 

	
 

	
 
class BaseChangeset(object):
 
    """
 
    Each backend should implement it's changeset representation.
 

	
 
    **Attributes**
 

	
 
        ``repository``
 
            repository object within which changeset exists
 

	
 
        ``raw_id``
 
            raw changeset representation (i.e. full 40 length sha for git
 
            backend)
 

	
 
        ``short_id``
 
            shortened (if apply) version of ``raw_id``; it would be simple
 
            shortcut for ``raw_id[:12]`` for git/mercurial backends
 

	
 
        ``revision``
 
            revision number as integer
 

	
 
        ``files``
 
            list of ``FileNode`` (``Node`` with NodeKind.FILE) objects
 

	
 
        ``dirs``
 
            list of ``DirNode`` (``Node`` with NodeKind.DIR) objects
 

	
 
        ``nodes``
 
            combined list of ``Node`` objects
 

	
 
        ``author``
 
            author of the changeset, as str
 

	
 
        ``message``
 
            message of the changeset, as str
 

	
 
        ``parents``
 
            list of parent changesets
 

	
 
        ``last``
 
            ``True`` if this is last changeset in repository, ``False``
 
            otherwise; trying to access this attribute while there is no
 
            changesets would raise ``EmptyRepositoryError``
 
    """
 
    def __str__(self):
 
        return '<%s at %s:%s>' % (self.__class__.__name__, self.revision,
 
            self.short_id)
 

	
 
    def __repr__(self):
 
        return self.__str__()
 

	
 
    def __eq__(self, other):
 
        if type(self) is not type(other):
 
            return False
 
        return self.raw_id == other.raw_id
 

	
 
    def __json__(self, with_file_list=False):
 
        if with_file_list:
 
            return dict(
 
                short_id=self.short_id,
 
                raw_id=self.raw_id,
 
                revision=self.revision,
 
                message=self.message,
 
                date=self.date,
 
                author=self.author,
 
                added=[el.path for el in self.added],
 
                changed=[el.path for el in self.changed],
 
                removed=[el.path for el in self.removed],
 
            )
 
        else:
 
            return dict(
 
                short_id=self.short_id,
 
                raw_id=self.raw_id,
 
                revision=self.revision,
 
                message=self.message,
 
                date=self.date,
 
                author=self.author,
 
            )
 

	
 
    @LazyProperty
 
    def last(self):
 
        if self.repository is None:
 
            raise ChangesetError("Cannot check if it's most recent revision")
 
        return self.raw_id == self.repository.revisions[-1]
 

	
 
    @LazyProperty
 
    def parents(self):
 
        """
 
        Returns list of parents changesets.
 
        """
 
        raise NotImplementedError
 

	
 
    @LazyProperty
 
    def children(self):
 
        """
 
        Returns list of children changesets.
 
        """
 
        raise NotImplementedError
 

	
 
    @LazyProperty
 
    def raw_id(self):
 
        """
 
        Returns raw string identifying this changeset.
 
        """
 
        raise NotImplementedError
 

	
 
    @LazyProperty
 
    def short_id(self):
 
        """
 
        Returns shortened version of ``raw_id`` attribute, as string,
 
        identifying this changeset, useful for web representation.
 
        """
 
        raise NotImplementedError
 

	
 
    @LazyProperty
 
    def revision(self):
 
        """
 
        Returns integer identifying this changeset.
 

	
 
        """
 
        raise NotImplementedError
 

	
 
    @LazyProperty
 
    def committer(self):
 
        """
 
        Returns Committer for given commit
 
        """
 

	
 
        raise NotImplementedError
 

	
 
    @LazyProperty
 
    def committer_name(self):
 
        """
 
        Returns Author name for given commit
 
        """
 

	
 
        return author_name(self.committer)
 

	
 
    @LazyProperty
 
    def committer_email(self):
 
        """
 
        Returns Author email address for given commit
 
        """
 

	
 
        return author_email(self.committer)
 

	
 
    @LazyProperty
 
    def author(self):
 
        """
 
        Returns Author for given commit
 
        """
 

	
 
        raise NotImplementedError
 

	
 
    @LazyProperty
 
    def author_name(self):
 
        """
 
        Returns Author name for given commit
 
        """
 

	
 
        return author_name(self.author)
 

	
 
    @LazyProperty
 
    def author_email(self):
 
        """
 
        Returns Author email address for given commit
 
        """
 

	
 
        return author_email(self.author)
 

	
 
    def get_file_mode(self, path):
 
        """
 
        Returns stat mode of the file at the given ``path``.
 
        """
 
        raise NotImplementedError
 

	
 
    def get_file_content(self, path):
 
        """
 
        Returns content of the file at the given ``path``.
 
        """
 
        raise NotImplementedError
 

	
 
    def get_file_size(self, path):
 
        """
 
        Returns size of the file at the given ``path``.
 
        """
 
        raise NotImplementedError
 

	
 
    def get_file_changeset(self, path):
 
        """
 
        Returns last commit of the file at the given ``path``.
 
        """
 
        raise NotImplementedError
 

	
 
    def get_file_history(self, path):
 
        """
 
        Returns history of file as reversed list of ``Changeset`` objects for
 
        which file at given ``path`` has been modified.
 
        """
 
        raise NotImplementedError
 

	
 
    def get_nodes(self, path):
 
        """
 
        Returns combined ``DirNode`` and ``FileNode`` objects list representing
 
        state of changeset at the given ``path``.
 

	
 
        :raises ``ChangesetError``: if node at the given ``path`` is not
 
          instance of ``DirNode``
 
        """
 
        raise NotImplementedError
 

	
 
    def get_node(self, path):
 
        """
 
        Returns ``Node`` object from the given ``path``.
 

	
 
        :raises ``NodeDoesNotExistError``: if there is no node at the given
 
          ``path``
 
        """
 
        raise NotImplementedError
 

	
 
    def fill_archive(self, stream=None, kind='tgz', prefix=None):
 
        """
 
        Fills up given stream.
 

	
 
        :param stream: file like object.
 
        :param kind: one of following: ``zip``, ``tar``, ``tgz``
 
            or ``tbz2``. Default: ``tgz``.
 
        :param prefix: name of root directory in archive.
 
            Default is repository name and changeset's raw_id joined with dash.
 

	
 
            repo-tip.<kind>
 
        """
 

	
 
        raise NotImplementedError
 

	
 
    def get_chunked_archive(self, **kwargs):
 
        """
 
        Returns iterable archive. Tiny wrapper around ``fill_archive`` method.
 

	
 
        :param chunk_size: extra parameter which controls size of returned
 
            chunks. Default:8k.
 
        """
 

	
 
        chunk_size = kwargs.pop('chunk_size', 8192)
 
        stream = kwargs.get('stream')
 
        self.fill_archive(**kwargs)
 
        while True:
 
            data = stream.read(chunk_size)
 
            if not data:
 
                break
 
            yield data
 

	
 
    @LazyProperty
 
    def root(self):
 
        """
 
        Returns ``RootNode`` object for this changeset.
 
        """
 
        return self.get_node('')
 

	
 
    def next(self, branch=None):
 
        """
 
        Returns next changeset from current, if branch is gives it will return
 
        next changeset belonging to this branch
 

	
 
        :param branch: show changesets within the given named branch
 
        """
 
        raise NotImplementedError
 

	
 
    def prev(self, branch=None):
 
        """
 
        Returns previous changeset from current, if branch is gives it will
 
        return previous changeset belonging to this branch
 

	
 
        :param branch: show changesets within the given named branch
 
        """
 
        raise NotImplementedError
 

	
 
    @LazyProperty
 
    def added(self):
 
        """
 
        Returns list of added ``FileNode`` objects.
 
        """
 
        raise NotImplementedError
 

	
 
    @LazyProperty
 
    def changed(self):
 
        """
 
        Returns list of modified ``FileNode`` objects.
 
        """
 
        raise NotImplementedError
 

	
 
    @LazyProperty
 
    def removed(self):
 
        """
 
        Returns list of removed ``FileNode`` objects.
 
        """
 
        raise NotImplementedError
 

	
 
    @LazyProperty
 
    def size(self):
 
        """
 
        Returns total number of bytes from contents of all filenodes.
 
        """
 
        return sum((node.size for node in self.get_filenodes_generator()))
 

	
 
    def walk(self, topurl=''):
 
        """
 
        Similar to os.walk method. Instead of filesystem it walks through
 
        changeset starting at given ``topurl``.  Returns generator of tuples
 
        (topnode, dirnodes, filenodes).
 
        """
 
        topnode = self.get_node(topurl)
 
        yield (topnode, topnode.dirs, topnode.files)
 
        for dirnode in topnode.dirs:
 
            for tup in self.walk(dirnode.path):
 
                yield tup
 

	
 
    def get_filenodes_generator(self):
 
        """
 
        Returns generator that yields *all* file nodes.
 
        """
 
        for topnode, dirs, files in self.walk():
 
            for node in files:
 
                yield node
 

	
 
    def as_dict(self):
 
        """
 
        Returns dictionary with changeset's attributes and their values.
 
        """
 
        data = get_dict_for_attrs(self, ['raw_id', 'short_id',
 
            'revision', 'date', 'message'])
 
        data['author'] = {'name': self.author_name, 'email': self.author_email}
 
        data['added'] = [node.path for node in self.added]
 
        data['changed'] = [node.path for node in self.changed]
 
        data['removed'] = [node.path for node in self.removed]
 
        return data
 

	
 
    @LazyProperty
 
    def closesbranch(self):
 
        return False
 

	
 
    @LazyProperty
 
    def obsolete(self):
 
        return False
 

	
 
    @LazyProperty
 
    def bumped(self):
 
        return False
 

	
 
    @LazyProperty
 
    def divergent(self):
 
        return False
 

	
 
    @LazyProperty
 
    def extinct(self):
 
        return False
 

	
 
    @LazyProperty
 
    def unstable(self):
 
        return False
 

	
 
    @LazyProperty
 
    def phase(self):
 
        return ''
 

	
 

	
 
class BaseWorkdir(object):
 
    """
 
    Working directory representation of single repository.
 

	
 
    :attribute: repository: repository object of working directory
 
    """
 

	
 
    def __init__(self, repository):
 
        self.repository = repository
 

	
 
    def get_branch(self):
 
        """
 
        Returns name of current branch.
 
        """
 
        raise NotImplementedError
 

	
 
    def get_changeset(self):
 
        """
 
        Returns current changeset.
 
        """
 
        raise NotImplementedError
 

	
 
    def get_added(self):
 
        """
 
        Returns list of ``FileNode`` objects marked as *new* in working
 
        directory.
 
        """
 
        raise NotImplementedError
 

	
 
    def get_changed(self):
 
        """
 
        Returns list of ``FileNode`` objects *changed* in working directory.
 
        """
 
        raise NotImplementedError
 

	
 
    def get_removed(self):
 
        """
 
        Returns list of ``RemovedFileNode`` objects marked as *removed* in
 
        working directory.
 
        """
 
        raise NotImplementedError
 

	
 
    def get_untracked(self):
 
        """
 
        Returns list of ``FileNode`` objects which are present within working
 
        directory however are not tracked by repository.
 
        """
 
        raise NotImplementedError
 

	
 
    def get_status(self):
 
        """
 
        Returns dict with ``added``, ``changed``, ``removed`` and ``untracked``
 
        lists.
 
        """
 
        raise NotImplementedError
 

	
 
    def commit(self, message, **kwargs):
 
        """
 
        Commits local (from working directory) changes and returns newly
 
        created
 
        ``Changeset``. Updates repository's ``revisions`` list.
 

	
 
        :raises ``CommitError``: if any error occurs while committing
 
        """
 
        raise NotImplementedError
 

	
 
    def update(self, revision=None):
 
        """
 
        Fetches content of the given revision and populates it within working
 
        directory.
 
        """
 
        raise NotImplementedError
 

	
 
    def checkout_branch(self, branch=None):
 
        """
 
        Checks out ``branch`` or the backend's default branch.
 

	
 
        Raises ``BranchDoesNotExistError`` if the branch does not exist.
 
        """
 
        raise NotImplementedError
 

	
 

	
 
class BaseInMemoryChangeset(object):
 
    """
 
    Represents differences between repository's state (most recent head) and
 
    changes made *in place*.
 

	
 
    **Attributes**
 

	
 
        ``repository``
 
            repository object for this in-memory-changeset
 

	
 
        ``added``
 
            list of ``FileNode`` objects marked as *added*
 

	
 
        ``changed``
 
            list of ``FileNode`` objects marked as *changed*
 

	
 
        ``removed``
 
            list of ``FileNode`` or ``RemovedFileNode`` objects marked to be
 
            *removed*
 

	
 
        ``parents``
 
            list of ``Changeset`` representing parents of in-memory changeset.
 
            Should always be 2-element sequence.
 

	
 
    """
 

	
 
    def __init__(self, repository):
 
        self.repository = repository
 
        self.added = []
 
        self.changed = []
 
        self.removed = []
 
        self.parents = []
 

	
 
    def add(self, *filenodes):
 
        """
 
        Marks given ``FileNode`` objects as *to be committed*.
 

	
 
        :raises ``NodeAlreadyExistsError``: if node with same path exists at
 
          latest changeset
 
        :raises ``NodeAlreadyAddedError``: if node with same path is already
 
          marked as *added*
 
        """
 
        # Check if not already marked as *added* first
 
        for node in filenodes:
 
            if node.path in (n.path for n in self.added):
 
                raise NodeAlreadyAddedError("Such FileNode %s is already "
 
                    "marked for addition" % node.path)
 
        for node in filenodes:
 
            self.added.append(node)
 

	
 
    def change(self, *filenodes):
 
        """
 
        Marks given ``FileNode`` objects to be *changed* in next commit.
 

	
 
        :raises ``EmptyRepositoryError``: if there are no changesets yet
 
        :raises ``NodeAlreadyExistsError``: if node with same path is already
 
          marked to be *changed*
 
        :raises ``NodeAlreadyRemovedError``: if node with same path is already
 
          marked to be *removed*
 
        :raises ``NodeDoesNotExistError``: if node doesn't exist in latest
 
          changeset
 
        :raises ``NodeNotChangedError``: if node hasn't really be changed
 
        """
 
        for node in filenodes:
 
            if node.path in (n.path for n in self.removed):
 
                raise NodeAlreadyRemovedError("Node at %s is already marked "
 
                    "as removed" % node.path)
 
        try:
 
            self.repository.get_changeset()
 
        except EmptyRepositoryError:
 
            raise EmptyRepositoryError("Nothing to change - try to *add* new "
 
                "nodes rather than changing them")
 
        for node in filenodes:
 
            if node.path in (n.path for n in self.changed):
 
                raise NodeAlreadyChangedError("Node at '%s' is already "
 
                    "marked as changed" % node.path)
 
            self.changed.append(node)
 

	
 
    def remove(self, *filenodes):
 
        """
 
        Marks given ``FileNode`` (or ``RemovedFileNode``) objects to be
 
        *removed* in next commit.
 

	
 
        :raises ``NodeAlreadyRemovedError``: if node has been already marked to
 
          be *removed*
 
        :raises ``NodeAlreadyChangedError``: if node has been already marked to
 
          be *changed*
 
        """
 
        for node in filenodes:
 
            if node.path in (n.path for n in self.removed):
 
                raise NodeAlreadyRemovedError("Node is already marked to "
 
                    "for removal at %s" % node.path)
 
            if node.path in (n.path for n in self.changed):
 
                raise NodeAlreadyChangedError("Node is already marked to "
 
                    "be changed at %s" % node.path)
 
            # We only mark node as *removed* - real removal is done by
 
            # commit method
 
            self.removed.append(node)
 

	
 
    def reset(self):
 
        """
 
        Resets this instance to initial state (cleans ``added``, ``changed``
 
        and ``removed`` lists).
 
        """
 
        self.added = []
 
        self.changed = []
 
        self.removed = []
 
        self.parents = []
 

	
 
    def get_ipaths(self):
 
        """
 
        Returns generator of paths from nodes marked as added, changed or
 
        removed.
 
        """
 
        for node in itertools.chain(self.added, self.changed, self.removed):
 
            yield node.path
 

	
 
    def get_paths(self):
 
        """
 
        Returns list of paths from nodes marked as added, changed or removed.
 
        """
 
        return list(self.get_ipaths())
 

	
 
    def check_integrity(self, parents=None):
 
        """
 
        Checks in-memory changeset's integrity. Also, sets parents if not
 
        already set.
 

	
 
        :raises CommitError: if any error occurs (i.e.
 
          ``NodeDoesNotExistError``).
 
        """
 
        if not self.parents:
 
            parents = parents or []
 
            if len(parents) == 0:
 
                try:
 
                    parents = [self.repository.get_changeset(), None]
 
                except EmptyRepositoryError:
 
                    parents = [None, None]
 
            elif len(parents) == 1:
 
                parents += [None]
 
            self.parents = parents
 

	
 
        # Local parents, only if not None
 
        parents = [p for p in self.parents if p]
 

	
 
        # Check nodes marked as added
 
        for p in parents:
 
            for node in self.added:
 
                try:
 
                    p.get_node(node.path)
 
                except NodeDoesNotExistError:
 
                    pass
 
                else:
 
                    raise NodeAlreadyExistsError("Node at %s already exists "
 
                        "at %s" % (node.path, p))
 

	
 
        # Check nodes marked as changed
 
        missing = set(node.path for node in self.changed)
 
        not_changed = set(node.path for node in self.changed)
 
        if self.changed and not parents:
 
            raise NodeDoesNotExistError(self.changed[0].path)
 
        for p in parents:
 
            for node in self.changed:
 
                try:
 
                    old = p.get_node(node.path)
 
                    missing.remove(node.path)
 
                    # if content actually changed, remove node from unchanged
 
                    if old.content != node.content:
 
                        not_changed.remove(node.path)
 
                except NodeDoesNotExistError:
 
                    pass
 
        if self.changed and missing:
 
            raise NodeDoesNotExistError("Node at %s is missing "
 
                "(parents: %s)" % (node.path, parents))
 

	
 
        if self.changed and not_changed:
 
            raise NodeNotChangedError("Node at %s wasn't actually changed "
 
                "since parents' changesets: %s" % (not_changed.pop(),
 
                    parents)
 
            )
 

	
kallithea/lib/vcs/backends/git/repository.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
"""
 
    vcs.backends.git.repository
 
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
    Git repository implementation.
 

	
 
    :created_on: Apr 8, 2010
 
    :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
 
"""
 

	
 
import errno
 
import logging
 
import os
 
import re
 
import time
 
import urllib.error
 
import urllib.parse
 
import urllib.request
 
from collections import OrderedDict
 

	
 
import mercurial.util  # import url as hg_url
 
from dulwich.client import SubprocessGitClient
 
from dulwich.config import ConfigFile
 
from dulwich.objects import Tag
 
from dulwich.repo import NotGitRepository, Repo
 
from dulwich.server import update_server_info
 

	
 
from kallithea.lib.vcs import subprocessio
 
from kallithea.lib.vcs.backends.base import BaseRepository, CollectionGenerator
 
from kallithea.lib.vcs.conf import settings
 
from kallithea.lib.vcs.exceptions import (BranchDoesNotExistError, ChangesetDoesNotExistError, EmptyRepositoryError, RepositoryError, TagAlreadyExistError,
 
                                          TagDoesNotExistError)
 
from kallithea.lib.vcs.utils import ascii_str, date_fromtimestamp, makedate, safe_bytes, safe_str
 
from kallithea.lib.vcs.utils import ascii_bytes, ascii_str, date_fromtimestamp, makedate, safe_bytes, safe_str
 
from kallithea.lib.vcs.utils.helpers import get_urllib_request_handlers
 
from kallithea.lib.vcs.utils.lazy import LazyProperty
 
from kallithea.lib.vcs.utils.paths import abspath, get_user_home
 

	
 
from .changeset import GitChangeset
 
from .inmemory import GitInMemoryChangeset
 
from .workdir import GitWorkdir
 

	
 

	
 
SHA_PATTERN = re.compile(r'^([0-9a-fA-F]{12}|[0-9a-fA-F]{40})$')
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class GitRepository(BaseRepository):
 
    """
 
    Git repository backend.
 
    """
 
    DEFAULT_BRANCH_NAME = 'master'
 
    scm = 'git'
 

	
 
    def __init__(self, repo_path, create=False, src_url=None,
 
                 update_after_clone=False, bare=False):
 

	
 
        self.path = abspath(repo_path)
 
        self.repo = self._get_repo(create, src_url, update_after_clone, bare)
 
        self.bare = self.repo.bare
 

	
 
    @property
 
    def _config_files(self):
 
        return [
 
            self.bare and abspath(self.path, 'config')
 
                      or abspath(self.path, '.git', 'config'),
 
             abspath(get_user_home(), '.gitconfig'),
 
         ]
 

	
 
    @property
 
    def _repo(self):
 
        return self.repo
 

	
 
    @property
 
    def head(self):
 
        try:
 
            return self._repo.head()
 
        except KeyError:
 
            return None
 

	
 
    @property
 
    def _empty(self):
 
        """
 
        Checks if repository is empty ie. without any changesets
 
        """
 

	
 
        try:
 
            self.revisions[0]
 
        except (KeyError, IndexError):
 
            return True
 
        return False
 

	
 
    @LazyProperty
 
    def revisions(self):
 
        """
 
        Returns list of revisions' ids, in ascending order.  Being lazy
 
        attribute allows external tools to inject shas from cache.
 
        """
 
        return self._get_all_revisions()
 

	
 
    @classmethod
 
    def _run_git_command(cls, cmd, cwd=None):
 
        """
 
        Runs given ``cmd`` as git command and returns output bytes in a tuple
 
        (stdout, stderr) ... or raise RepositoryError.
 

	
 
        :param cmd: git command to be executed
 
        :param cwd: passed directly to subprocess
 
        """
 
        # need to clean fix GIT_DIR !
 
        gitenv = dict(os.environ)
 
        gitenv.pop('GIT_DIR', None)
 
        gitenv['GIT_CONFIG_NOGLOBAL'] = '1'
 

	
 
        assert isinstance(cmd, list), cmd
 
        cmd = [settings.GIT_EXECUTABLE_PATH, '-c', 'core.quotepath=false'] + cmd
 
        try:
 
            p = subprocessio.SubprocessIOChunker(cmd, cwd=cwd, env=gitenv, shell=False)
 
        except (EnvironmentError, OSError) as err:
 
            # output from the failing process is in str(EnvironmentError)
 
            msg = ("Couldn't run git command %s.\n"
 
                   "Subprocess failed with '%s': %s\n" %
 
                   (cmd, type(err).__name__, err)
 
            ).strip()
 
            log.error(msg)
 
            raise RepositoryError(msg)
 

	
 
        try:
 
            stdout = b''.join(p.output)
 
            stderr = b''.join(p.error)
 
        finally:
 
            p.close()
 
        # TODO: introduce option to make commands fail if they have any stderr output?
 
        if stderr:
 
            log.debug('stderr from %s:\n%s', cmd, stderr)
 
        else:
 
            log.debug('stderr from %s: None', cmd)
 
        return stdout, stderr
 

	
 
    def run_git_command(self, cmd):
 
        """
 
        Runs given ``cmd`` as git command with cwd set to current repo.
 
        Returns stdout as unicode str ... or raise RepositoryError.
 
        """
 
        cwd = None
 
        if os.path.isdir(self.path):
 
            cwd = self.path
 
        stdout, _stderr = self._run_git_command(cmd, cwd=cwd)
 
        return safe_str(stdout)
 

	
 
    @classmethod
 
    def _check_url(cls, url):
 
        """
 
        Function will check given url and try to verify if it's a valid
 
        link. Sometimes it may happened that git will issue basic
 
        auth request that can cause whole API to hang when used from python
 
        or other external calls.
 

	
 
        On failures it'll raise urllib2.HTTPError, exception is also thrown
 
        when the return code is non 200
 
        """
 
        # check first if it's not an local url
 
        if os.path.isdir(url) or url.startswith('file:'):
 
            return True
 

	
 
        if url.startswith('git://'):
 
            return True
 

	
 
        if '+' in url[:url.find('://')]:
 
            url = url[url.find('+') + 1:]
 

	
 
        url_obj = mercurial.util.url(safe_bytes(url))
 
        test_uri, handlers = get_urllib_request_handlers(url_obj)
 
        if not test_uri.endswith(b'info/refs'):
 
            test_uri = test_uri.rstrip(b'/') + b'/info/refs'
 

	
 
        url_obj.passwd = b'*****'
 
        cleaned_uri = str(url_obj)
 

	
 
        o = urllib.request.build_opener(*handlers)
 
        o.addheaders = [('User-Agent', 'git/1.7.8.0')]  # fake some git
 

	
 
        req = urllib.request.Request(
 
            "%s?%s" % (
 
                safe_str(test_uri),
 
                urllib.parse.urlencode({"service": 'git-upload-pack'})
 
            ))
 

	
 
        try:
 
            resp = o.open(req)
 
            if resp.code != 200:
 
                raise Exception('Return Code is not 200')
 
        except Exception as e:
 
            # means it cannot be cloned
 
            raise urllib.error.URLError("[%s] org_exc: %s" % (cleaned_uri, e))
 

	
 
        # now detect if it's proper git repo
 
        gitdata = resp.read()
 
        if b'service=git-upload-pack' not in gitdata:
 
            raise urllib.error.URLError(
 
                "url [%s] does not look like an git" % cleaned_uri)
 

	
 
        return True
 

	
 
    def _get_repo(self, create, src_url=None, update_after_clone=False,
 
                  bare=False):
 
        if create and os.path.exists(self.path):
 
            raise RepositoryError("Location already exist")
 
        if src_url and not create:
 
            raise RepositoryError("Create should be set to True if src_url is "
 
                                  "given (clone operation creates repository)")
 
        try:
 
            if create and src_url:
 
                GitRepository._check_url(src_url)
 
                self.clone(src_url, update_after_clone, bare)
 
                return Repo(self.path)
 
            elif create:
 
                os.makedirs(self.path)
 
                if bare:
 
                    return Repo.init_bare(self.path)
 
                else:
 
                    return Repo.init(self.path)
 
            else:
 
                return Repo(self.path)
 
        except (NotGitRepository, OSError) as err:
 
            raise RepositoryError(err)
 

	
 
    def _get_all_revisions(self):
 
        # we must check if this repo is not empty, since later command
 
        # fails if it is. And it's cheaper to ask than throw the subprocess
 
        # errors
 
        try:
 
            self._repo.head()
 
        except KeyError:
 
            return []
 

	
 
        rev_filter = settings.GIT_REV_FILTER
 
        cmd = ['rev-list', rev_filter, '--reverse', '--date-order']
 
        try:
 
            so = self.run_git_command(cmd)
 
        except RepositoryError:
 
            # Can be raised for empty repositories
 
            return []
 
        return so.splitlines()
 

	
 
    def _get_all_revisions2(self):
 
        # alternate implementation using dulwich
 
        includes = [ascii_str(sha) for key, (sha, type_) in self._parsed_refs.items()
 
                    if type_ != b'T']
 
        return [c.commit.id for c in self._repo.get_walker(include=includes)]
 

	
 
    def _get_revision(self, revision):
 
        """
 
        Given any revision identifier, returns a 40 char string with revision hash.
 
        """
 
        if self._empty:
 
            raise EmptyRepositoryError("There are no changesets yet")
 

	
 
        if revision in (None, '', 'tip', 'HEAD', 'head', -1):
 
            revision = -1
 

	
 
        if isinstance(revision, int):
 
            try:
 
                return self.revisions[revision]
 
            except IndexError:
 
                msg = "Revision %r does not exist for %s" % (revision, self.name)
 
                raise ChangesetDoesNotExistError(msg)
 

	
 
        if isinstance(revision, str):
 
            if revision.isdigit() and (len(revision) < 12 or len(revision) == revision.count('0')):
 
                try:
 
                    return self.revisions[int(revision)]
 
                except IndexError:
 
                    msg = "Revision %r does not exist for %s" % (revision, self)
 
                    raise ChangesetDoesNotExistError(msg)
 

	
 
            # get by branch/tag name
 
            _ref_revision = self._parsed_refs.get(safe_bytes(revision))
 
            if _ref_revision:  # and _ref_revision[1] in [b'H', b'RH', b'T']:
 
                return ascii_str(_ref_revision[0])
 

	
 
            if revision in self.revisions:
 
                return revision
 

	
 
            # maybe it's a tag ? we don't have them in self.revisions
 
            if revision in self.tags.values():
 
                return revision
 

	
 
            if SHA_PATTERN.match(revision):
 
                msg = "Revision %r does not exist for %s" % (revision, self.name)
 
                raise ChangesetDoesNotExistError(msg)
 

	
 
        raise ChangesetDoesNotExistError("Given revision %r not recognized" % revision)
 

	
 
    def get_ref_revision(self, ref_type, ref_name):
 
        """
 
        Returns ``GitChangeset`` object representing repository's
 
        changeset at the given ``revision``.
 
        """
 
        return self._get_revision(ref_name)
 

	
 
    def _get_archives(self, archive_name='tip'):
 

	
 
        for i in [('zip', '.zip'), ('gz', '.tar.gz'), ('bz2', '.tar.bz2')]:
 
            yield {"type": i[0], "extension": i[1], "node": archive_name}
 

	
 
    def _get_url(self, url):
 
        """
 
        Returns normalized url. If schema is not given, would fall to
 
        filesystem (``file:///``) schema.
 
        """
 
        if url != 'default' and '://' not in url:
 
            url = ':///'.join(('file', url))
 
        return url
 

	
 
    @LazyProperty
 
    def name(self):
 
        return os.path.basename(self.path)
 

	
 
    @LazyProperty
 
    def last_change(self):
 
        """
 
        Returns last change made on this repository as datetime object
 
        """
 
        return date_fromtimestamp(self._get_mtime(), makedate()[1])
 

	
 
    def _get_mtime(self):
 
        try:
 
            return time.mktime(self.get_changeset().date.timetuple())
 
        except RepositoryError:
 
            idx_loc = '' if self.bare else '.git'
 
            # fallback to filesystem
 
            in_path = os.path.join(self.path, idx_loc, "index")
 
            he_path = os.path.join(self.path, idx_loc, "HEAD")
 
            if os.path.exists(in_path):
 
                return os.stat(in_path).st_mtime
 
            else:
 
                return os.stat(he_path).st_mtime
 

	
 
    @LazyProperty
 
    def description(self):
 
        return safe_str(self._repo.get_description() or b'unknown')
 

	
 
    @LazyProperty
 
    def contact(self):
 
        undefined_contact = 'Unknown'
 
        return undefined_contact
 

	
 
    @property
 
    def branches(self):
 
        if not self.revisions:
 
            return {}
 
        _branches = [(safe_str(key), ascii_str(sha))
 
                     for key, (sha, type_) in self._parsed_refs.items() if type_ == b'H']
 
        return OrderedDict(sorted(_branches, key=(lambda ctx: ctx[0]), reverse=False))
 

	
 
    @LazyProperty
 
    def closed_branches(self):
 
        return {}
 

	
 
    @LazyProperty
 
    def tags(self):
 
        return self._get_tags()
 

	
 
    def _get_tags(self):
 
        if not self.revisions:
 
            return {}
 
        _tags = [(safe_str(key), ascii_str(sha))
 
                 for key, (sha, type_) in self._parsed_refs.items() if type_ == b'T']
 
        return OrderedDict(sorted(_tags, key=(lambda ctx: ctx[0]), reverse=True))
 

	
 
    def tag(self, name, user, revision=None, message=None, date=None,
 
            **kwargs):
 
        """
 
        Creates and returns a tag for the given ``revision``.
 

	
 
        :param name: name for new tag
 
        :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
 
        :param revision: changeset id for which new tag would be created
 
        :param message: message of the tag's commit
 
        :param date: date of tag's commit
 

	
 
        :raises TagAlreadyExistError: if tag with same name already exists
 
        """
 
        if name in self.tags:
 
            raise TagAlreadyExistError("Tag %s already exists" % name)
 
        changeset = self.get_changeset(revision)
 
        message = message or "Added tag %s for commit %s" % (name,
 
            changeset.raw_id)
 
        self._repo.refs[b"refs/tags/%s" % safe_bytes(name)] = changeset._commit.id
 

	
 
        self._parsed_refs = self._get_parsed_refs()
 
        self.tags = self._get_tags()
 
        return changeset
 

	
 
    def remove_tag(self, name, user, message=None, date=None):
 
        """
 
        Removes tag with the given ``name``.
 

	
 
        :param name: name of the tag to be removed
 
        :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
 
        :param message: message of the tag's removal commit
 
        :param date: date of tag's removal commit
 

	
 
        :raises TagDoesNotExistError: if tag with given name does not exists
 
        """
 
        if name not in self.tags:
 
            raise TagDoesNotExistError("Tag %s does not exist" % name)
 
        # self._repo.refs is a DiskRefsContainer, and .path gives the full absolute path of '.git'
 
        tagpath = os.path.join(safe_str(self._repo.refs.path), 'refs', 'tags', name)
 
        try:
 
            os.remove(tagpath)
 
            self._parsed_refs = self._get_parsed_refs()
 
            self.tags = self._get_tags()
 
        except OSError as e:
 
            raise RepositoryError(e.strerror)
 

	
 
    @LazyProperty
 
    def bookmarks(self):
 
        """
 
        Gets bookmarks for this repository
 
        """
 
        return {}
 

	
 
    @LazyProperty
 
    def _parsed_refs(self):
 
        return self._get_parsed_refs()
 

	
 
    def _get_parsed_refs(self):
 
        """Return refs as a dict, like:
 
        { b'v0.2.0': [b'599ba911aa24d2981225f3966eb659dfae9e9f30', b'T'] }
 
        """
 
        _repo = self._repo
 
        refs = _repo.get_refs()
 
        keys = [(b'refs/heads/', b'H'),
 
                (b'refs/remotes/origin/', b'RH'),
 
                (b'refs/tags/', b'T')]
 
        _refs = {}
 
        for ref, sha in refs.items():
 
            for k, type_ in keys:
 
                if ref.startswith(k):
 
                    _key = ref[len(k):]
 
                    if type_ == b'T':
 
                        obj = _repo.get_object(sha)
 
                        if isinstance(obj, Tag):
 
                            sha = _repo.get_object(sha).object[1]
 
                    _refs[_key] = [sha, type_]
 
                    break
 
        return _refs
 

	
 
    def _heads(self, reverse=False):
 
        refs = self._repo.get_refs()
 
        heads = {}
 

	
 
        for key, val in refs.items():
 
            for ref_key in [b'refs/heads/', b'refs/remotes/origin/']:
 
                if key.startswith(ref_key):
 
                    n = key[len(ref_key):]
 
                    if n not in [b'HEAD']:
 
                        heads[n] = val
 

	
 
        return heads if reverse else dict((y, x) for x, y in heads.items())
 

	
 
    def get_changeset(self, revision=None):
 
        """
 
        Returns ``GitChangeset`` object representing commit from git repository
 
        at the given revision or head (most recent commit) if None given.
 
        """
 
        if isinstance(revision, GitChangeset):
 
            return revision
 
        return GitChangeset(repository=self, revision=self._get_revision(revision))
 

	
 
    def get_changesets(self, start=None, end=None, start_date=None,
 
           end_date=None, branch_name=None, reverse=False, max_revisions=None):
 
        """
 
        Returns iterator of ``GitChangeset`` objects from start to end (both
 
        are inclusive), in ascending date order (unless ``reverse`` is set).
 

	
 
        :param start: changeset ID, as str; first returned changeset
 
        :param end: changeset ID, as str; last returned changeset
 
        :param start_date: if specified, changesets with commit date less than
 
          ``start_date`` would be filtered out from returned set
 
        :param end_date: if specified, changesets with commit date greater than
 
          ``end_date`` would be filtered out from returned set
 
        :param branch_name: if specified, changesets not reachable from given
 
          branch would be filtered out from returned set
 
        :param reverse: if ``True``, returned generator would be reversed
 
          (meaning that returned changesets would have descending date order)
 

	
 
        :raise BranchDoesNotExistError: If given ``branch_name`` does not
 
            exist.
 
        :raise ChangesetDoesNotExistError: If changeset for given ``start`` or
 
          ``end`` could not be found.
 

	
 
        """
 
        if branch_name and branch_name not in self.branches:
 
            raise BranchDoesNotExistError("Branch '%s' not found"
 
                                          % branch_name)
 
        # actually we should check now if it's not an empty repo to not spaw
 
        # subprocess commands
 
        if self._empty:
 
            raise EmptyRepositoryError("There are no changesets yet")
 

	
 
        # %H at format means (full) commit hash, initial hashes are retrieved
 
        # in ascending date order
 
        cmd = ['log', '--date-order', '--reverse', '--pretty=format:%H']
 
        if max_revisions:
 
            cmd += ['--max-count=%s' % max_revisions]
 
        if start_date:
 
            cmd += ['--since', start_date.strftime('%m/%d/%y %H:%M:%S')]
 
        if end_date:
 
            cmd += ['--until', end_date.strftime('%m/%d/%y %H:%M:%S')]
 
        if branch_name:
 
            cmd.append(branch_name)
 
        else:
 
            cmd.append(settings.GIT_REV_FILTER)
 

	
 
        revs = self.run_git_command(cmd).splitlines()
 
        start_pos = 0
 
        end_pos = len(revs)
 
        if start:
 
            _start = self._get_revision(start)
 
            try:
 
                start_pos = revs.index(_start)
 
            except ValueError:
 
                pass
 

	
 
        if end is not None:
 
            _end = self._get_revision(end)
 
            try:
 
                end_pos = revs.index(_end)
 
            except ValueError:
 
                pass
 

	
 
        if None not in [start, end] and start_pos > end_pos:
 
            raise RepositoryError('start cannot be after end')
 

	
 
        if end_pos is not None:
 
            end_pos += 1
 

	
 
        revs = revs[start_pos:end_pos]
 
        if reverse:
 
            revs.reverse()
 

	
 
        return CollectionGenerator(self, revs)
 

	
 
    def get_diff_changesets(self, org_rev, other_repo, other_rev):
 
        """
 
        Returns lists of changesets that can be merged from this repo @org_rev
 
        to other_repo @other_rev
 
        ... and the other way
 
        ... and the ancestors that would be used for merge
 

	
 
        :param org_rev: the revision we want our compare to be made
 
        :param other_repo: repo object, most likely the fork of org_repo. It has
 
            all changesets that we need to obtain
 
        :param other_rev: revision we want out compare to be made on other_repo
 
        """
 
        org_changesets = []
 
        ancestors = None
 
        if org_rev == other_rev:
 
            other_changesets = []
 
        elif self != other_repo:
 
            gitrepo = Repo(self.path)
 
            SubprocessGitClient(thin_packs=False).fetch(other_repo.path, gitrepo)
 

	
 
            gitrepo_remote = Repo(other_repo.path)
 
            SubprocessGitClient(thin_packs=False).fetch(self.path, gitrepo_remote)
 

	
 
            revs = [
 
                ascii_str(x.commit.id)
 
                for x in gitrepo_remote.get_walker(include=[ascii_bytes(other_rev)],
 
                                                   exclude=[ascii_bytes(org_rev)])
 
            ]
 
            other_changesets = [other_repo.get_changeset(rev) for rev in reversed(revs)]
 
            if other_changesets:
 
                ancestors = [other_changesets[0].parents[0].raw_id]
 
            else:
 
                # no changesets from other repo, ancestor is the other_rev
 
                ancestors = [other_rev]
 

	
 
            gitrepo.close()
 
            gitrepo_remote.close()
 

	
 
        else:
 
            so = self.run_git_command(
 
                ['log', '--reverse', '--pretty=format:%H',
 
                 '-s', '%s..%s' % (org_rev, other_rev)]
 
            )
 
            other_changesets = [self.get_changeset(cs)
 
                          for cs in re.findall(r'[0-9a-fA-F]{40}', so)]
 
            so = self.run_git_command(
 
                ['merge-base', org_rev, other_rev]
 
            )
 
            ancestors = [re.findall(r'[0-9a-fA-F]{40}', so)[0]]
 

	
 
        return other_changesets, org_changesets, ancestors
 

	
 
    def get_diff(self, rev1, rev2, path=None, ignore_whitespace=False,
 
                 context=3):
 
        """
 
        Returns (git like) *diff*, as plain bytes text. Shows changes
 
        introduced by ``rev2`` since ``rev1``.
 

	
 
        :param rev1: Entry point from which diff is shown. Can be
 
          ``self.EMPTY_CHANGESET`` - in this case, patch showing all
 
          the changes since empty state of the repository until ``rev2``
 
        :param rev2: Until which revision changes should be shown.
 
        :param ignore_whitespace: If set to ``True``, would not show whitespace
 
          changes. Defaults to ``False``.
 
        :param context: How many lines before/after changed lines should be
 
          shown. Defaults to ``3``. Due to limitations in Git, if
 
          value passed-in is greater than ``2**31-1``
 
          (``2147483647``), it will be set to ``2147483647``
 
          instead. If negative value is passed-in, it will be set to
 
          ``0`` instead.
 
        """
 

	
 
        # Git internally uses a signed long int for storing context
 
        # size (number of lines to show before and after the
 
        # differences). This can result in integer overflow, so we
 
        # ensure the requested context is smaller by one than the
 
        # number that would cause the overflow. It is highly unlikely
 
        # that a single file will contain that many lines, so this
 
        # kind of change should not cause any realistic consequences.
 
        overflowed_long_int = 2**31
 

	
 
        if context >= overflowed_long_int:
 
            context = overflowed_long_int - 1
 

	
 
        # Negative context values make no sense, and will result in
 
        # errors. Ensure this does not happen.
 
        if context < 0:
 
            context = 0
 

	
 
        flags = ['-U%s' % context, '--full-index', '--binary', '-p', '-M', '--abbrev=40']
 
        if ignore_whitespace:
 
            flags.append('-w')
 

	
 
        if hasattr(rev1, 'raw_id'):
 
            rev1 = getattr(rev1, 'raw_id')
 

	
 
        if hasattr(rev2, 'raw_id'):
 
            rev2 = getattr(rev2, 'raw_id')
 

	
 
        if rev1 == self.EMPTY_CHANGESET:
 
            rev2 = self.get_changeset(rev2).raw_id
 
            cmd = ['show'] + flags + [rev2]
 
        else:
 
            rev1 = self.get_changeset(rev1).raw_id
 
            rev2 = self.get_changeset(rev2).raw_id
 
            cmd = ['diff'] + flags + [rev1, rev2]
 

	
 
        if path:
 
            cmd += ['--', path]
 

	
 
        stdout, stderr = self._run_git_command(cmd, cwd=self.path)
 
        # If we used 'show' command, strip first few lines (until actual diff
 
        # starts)
 
        if rev1 == self.EMPTY_CHANGESET:
 
            parts = stdout.split(b'\ndiff ', 1)
 
            if len(parts) > 1:
 
                stdout = b'diff ' + parts[1]
 
        return stdout
 

	
 
    @LazyProperty
 
    def in_memory_changeset(self):
 
        """
 
        Returns ``GitInMemoryChangeset`` object for this repository.
 
        """
 
        return GitInMemoryChangeset(self)
 

	
 
    def clone(self, url, update_after_clone=True, bare=False):
 
        """
 
        Tries to clone changes from external location.
 

	
 
        :param update_after_clone: If set to ``False``, git won't checkout
 
          working directory
 
        :param bare: If set to ``True``, repository would be cloned into
 
          *bare* git repository (no working directory at all).
 
        """
 
        url = self._get_url(url)
 
        cmd = ['clone', '-q']
 
        if bare:
 
            cmd.append('--bare')
 
        elif not update_after_clone:
 
            cmd.append('--no-checkout')
 
        cmd += ['--', url, self.path]
 
        # If error occurs run_git_command raises RepositoryError already
 
        self.run_git_command(cmd)
 

	
 
    def pull(self, url):
 
        """
 
        Tries to pull changes from external location.
 
        """
 
        url = self._get_url(url)
 
        cmd = ['pull', '--ff-only', url]
 
        # If error occurs run_git_command raises RepositoryError already
 
        self.run_git_command(cmd)
 

	
 
    def fetch(self, url):
 
        """
 
        Tries to pull changes from external location.
 
        """
 
        url = self._get_url(url)
 
        so = self.run_git_command(['ls-remote', '-h', url])
 
        cmd = ['fetch', url, '--']
 
        for line in (x for x in so.splitlines()):
 
            sha, ref = line.split('\t')
 
            cmd.append('+%s:%s' % (ref, ref))
 
        self.run_git_command(cmd)
 

	
 
    def _update_server_info(self):
 
        """
 
        runs gits update-server-info command in this repo instance
 
        """
 
        try:
 
            update_server_info(self._repo)
 
        except OSError as e:
 
            if e.errno not in [errno.ENOENT, errno.EROFS]:
 
                raise
 
            # Workaround for dulwich crashing on for example its own dulwich/tests/data/repos/simple_merge.git/info/refs.lock
 
            log.error('Ignoring %s running update-server-info: %s', type(e).__name__, e)
 

	
 
    @LazyProperty
 
    def workdir(self):
 
        """
 
        Returns ``Workdir`` instance for this repository.
 
        """
 
        return GitWorkdir(self)
 

	
 
    def get_config_value(self, section, name, config_file=None):
 
        """
 
        Returns configuration value for a given [``section``] and ``name``.
 

	
 
        :param section: Section we want to retrieve value from
 
        :param name: Name of configuration we want to retrieve
 
        :param config_file: A path to file which should be used to retrieve
 
          configuration from (might also be a list of file paths)
 
        """
 
        if config_file is None:
 
            config_file = []
 
        elif isinstance(config_file, str):
 
            config_file = [config_file]
 

	
 
        def gen_configs():
 
            for path in config_file + self._config_files:
 
                try:
 
                    yield ConfigFile.from_path(path)
 
                except (IOError, OSError, ValueError):
 
                    continue
 

	
 
        for config in gen_configs():
 
            try:
 
                value = config.get(section, name)
 
            except KeyError:
 
                continue
 
            return None if value is None else safe_str(value)
 
        return None
 

	
 
    def get_user_name(self, config_file=None):
 
        """
 
        Returns user's name from global configuration file.
 

	
 
        :param config_file: A path to file which should be used to retrieve
 
          configuration from (might also be a list of file paths)
 
        """
 
        return self.get_config_value('user', 'name', config_file)
 

	
 
    def get_user_email(self, config_file=None):
 
        """
 
        Returns user's email from global configuration file.
 

	
 
        :param config_file: A path to file which should be used to retrieve
 
          configuration from (might also be a list of file paths)
 
        """
 
        return self.get_config_value('user', 'email', config_file)
kallithea/lib/vcs/backends/hg/repository.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
"""
 
    vcs.backends.hg.repository
 
    ~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
    Mercurial repository implementation.
 

	
 
    :created_on: Apr 8, 2010
 
    :copyright: (c) 2010-2011 by Marcin Kuzminski, Lukasz Balcerzak.
 
"""
 

	
 
import datetime
 
import logging
 
import os
 
import time
 
import urllib.error
 
import urllib.parse
 
import urllib.request
 
from collections import OrderedDict
 

	
 
import mercurial.commands
 
import mercurial.error
 
import mercurial.exchange
 
import mercurial.hg
 
import mercurial.hgweb
 
import mercurial.httppeer
 
import mercurial.localrepo
 
import mercurial.match
 
import mercurial.mdiff
 
import mercurial.node
 
import mercurial.patch
 
import mercurial.scmutil
 
import mercurial.sshpeer
 
import mercurial.tags
 
import mercurial.ui
 
import mercurial.unionrepo
 
import mercurial.util
 

	
 
from kallithea.lib.vcs.backends.base import BaseRepository, CollectionGenerator
 
from kallithea.lib.vcs.exceptions import (BranchDoesNotExistError, ChangesetDoesNotExistError, EmptyRepositoryError, RepositoryError, TagAlreadyExistError,
 
                                          TagDoesNotExistError, VCSError)
 
from kallithea.lib.vcs.utils import ascii_str, author_email, author_name, date_fromtimestamp, makedate, safe_bytes, safe_str
 
from kallithea.lib.vcs.utils import ascii_bytes, ascii_str, author_email, author_name, date_fromtimestamp, makedate, safe_bytes, safe_str
 
from kallithea.lib.vcs.utils.helpers import get_urllib_request_handlers
 
from kallithea.lib.vcs.utils.lazy import LazyProperty
 
from kallithea.lib.vcs.utils.paths import abspath
 

	
 
from .changeset import MercurialChangeset
 
from .inmemory import MercurialInMemoryChangeset
 
from .workdir import MercurialWorkdir
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
class MercurialRepository(BaseRepository):
 
    """
 
    Mercurial repository backend
 
    """
 
    DEFAULT_BRANCH_NAME = 'default'
 
    scm = 'hg'
 

	
 
    def __init__(self, repo_path, create=False, baseui=None, src_url=None,
 
                 update_after_clone=False):
 
        """
 
        Raises RepositoryError if repository could not be find at the given
 
        ``repo_path``.
 

	
 
        :param repo_path: local path of the repository
 
        :param create=False: if set to True, would try to create repository if
 
           it does not exist rather than raising exception
 
        :param baseui=None: user data
 
        :param src_url=None: would try to clone repository from given location
 
        :param update_after_clone=False: sets update of working copy after
 
          making a clone
 
        """
 

	
 
        if not isinstance(repo_path, str):
 
            raise VCSError('Mercurial backend requires repository path to '
 
                           'be instance of <str> got %s instead' %
 
                           type(repo_path))
 
        self.path = abspath(repo_path)
 
        self.baseui = baseui or mercurial.ui.ui()
 
        # We've set path and ui, now we can set _repo itself
 
        self._repo = self._get_repo(create, src_url, update_after_clone)
 

	
 
    @property
 
    def _empty(self):
 
        """
 
        Checks if repository is empty ie. without any changesets
 
        """
 
        # TODO: Following raises errors when using InMemoryChangeset...
 
        # return len(self._repo.changelog) == 0
 
        return len(self.revisions) == 0
 

	
 
    @LazyProperty
 
    def revisions(self):
 
        """
 
        Returns list of revisions' ids, in ascending order.  Being lazy
 
        attribute allows external tools to inject shas from cache.
 
        """
 
        return self._get_all_revisions()
 

	
 
    @LazyProperty
 
    def name(self):
 
        return os.path.basename(self.path)
 

	
 
    @LazyProperty
 
    def branches(self):
 
        return self._get_branches()
 

	
 
    @LazyProperty
 
    def closed_branches(self):
 
        return self._get_branches(normal=False, closed=True)
 

	
 
    @LazyProperty
 
    def allbranches(self):
 
        """
 
        List all branches, including closed branches.
 
        """
 
        return self._get_branches(closed=True)
 

	
 
    def _get_branches(self, normal=True, closed=False):
 
        """
 
        Gets branches for this repository
 
        Returns only not closed branches by default
 

	
 
        :param closed: return also closed branches for mercurial
 
        :param normal: return also normal branches
 
        """
 

	
 
        if self._empty:
 
            return {}
 

	
 
        bt = OrderedDict()
 
        for bn, _heads, node, isclosed in sorted(self._repo.branchmap().iterbranches()):
 
            if isclosed:
 
                if closed:
 
                    bt[safe_str(bn)] = ascii_str(mercurial.node.hex(node))
 
            else:
 
                if normal:
 
                    bt[safe_str(bn)] = ascii_str(mercurial.node.hex(node))
 
        return bt
 

	
 
    @LazyProperty
 
    def tags(self):
 
        """
 
        Gets tags for this repository
 
        """
 
        return self._get_tags()
 

	
 
    def _get_tags(self):
 
        if self._empty:
 
            return {}
 

	
 
        return OrderedDict(sorted(
 
            ((safe_str(n), ascii_str(mercurial.node.hex(h))) for n, h in self._repo.tags().items()),
 
            reverse=True,
 
            key=lambda x: x[0],  # sort by name
 
        ))
 

	
 
    def tag(self, name, user, revision=None, message=None, date=None,
 
            **kwargs):
 
        """
 
        Creates and returns a tag for the given ``revision``.
 

	
 
        :param name: name for new tag
 
        :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
 
        :param revision: changeset id for which new tag would be created
 
        :param message: message of the tag's commit
 
        :param date: date of tag's commit
 

	
 
        :raises TagAlreadyExistError: if tag with same name already exists
 
        """
 
        if name in self.tags:
 
            raise TagAlreadyExistError("Tag %s already exists" % name)
 
        changeset = self.get_changeset(revision)
 
        local = kwargs.setdefault('local', False)
 

	
 
        if message is None:
 
            message = "Added tag %s for changeset %s" % (name,
 
                changeset.short_id)
 

	
 
        if date is None:
 
            date = safe_bytes(datetime.datetime.now().strftime('%a, %d %b %Y %H:%M:%S'))
 

	
 
        try:
 
            mercurial.tags.tag(self._repo, safe_bytes(name), changeset._ctx.node(), safe_bytes(message), local, safe_bytes(user), date)
 
        except mercurial.error.Abort as e:
 
            raise RepositoryError(e.args[0])
 

	
 
        # Reinitialize tags
 
        self.tags = self._get_tags()
 
        tag_id = self.tags[name]
 

	
 
        return self.get_changeset(revision=tag_id)
 

	
 
    def remove_tag(self, name, user, message=None, date=None):
 
        """
 
        Removes tag with the given ``name``.
 

	
 
        :param name: name of the tag to be removed
 
        :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
 
        :param message: message of the tag's removal commit
 
        :param date: date of tag's removal commit
 

	
 
        :raises TagDoesNotExistError: if tag with given name does not exists
 
        """
 
        if name not in self.tags:
 
            raise TagDoesNotExistError("Tag %s does not exist" % name)
 
        if message is None:
 
            message = "Removed tag %s" % name
 
        if date is None:
 
            date = safe_bytes(datetime.datetime.now().strftime('%a, %d %b %Y %H:%M:%S'))
 
        local = False
 

	
 
        try:
 
            mercurial.tags.tag(self._repo, safe_bytes(name), mercurial.commands.nullid, safe_bytes(message), local, safe_bytes(user), date)
 
            self.tags = self._get_tags()
 
        except mercurial.error.Abort as e:
 
            raise RepositoryError(e.args[0])
 

	
 
    @LazyProperty
 
    def bookmarks(self):
 
        """
 
        Gets bookmarks for this repository
 
        """
 
        return self._get_bookmarks()
 

	
 
    def _get_bookmarks(self):
 
        if self._empty:
 
            return {}
 

	
 
        return OrderedDict(sorted(
 
            ((safe_str(n), ascii_str(mercurial.node.hex(h))) for n, h in self._repo._bookmarks.items()),
 
            reverse=True,
 
            key=lambda x: x[0],  # sort by name
 
        ))
 

	
 
    def _get_all_revisions(self):
 
        return [ascii_str(self._repo[x].hex()) for x in self._repo.filtered(b'visible').changelog.revs()]
 

	
 
    def get_diff(self, rev1, rev2, path='', ignore_whitespace=False,
 
                  context=3):
 
        """
 
        Returns (git like) *diff*, as plain text. Shows changes introduced by
 
        ``rev2`` since ``rev1``.
 

	
 
        :param rev1: Entry point from which diff is shown. Can be
 
          ``self.EMPTY_CHANGESET`` - in this case, patch showing all
 
          the changes since empty state of the repository until ``rev2``
 
        :param rev2: Until which revision changes should be shown.
 
        :param ignore_whitespace: If set to ``True``, would not show whitespace
 
          changes. Defaults to ``False``.
 
        :param context: How many lines before/after changed lines should be
 
          shown. Defaults to ``3``. If negative value is passed-in, it will be
 
          set to ``0`` instead.
 
        """
 

	
 
        # Negative context values make no sense, and will result in
 
        # errors. Ensure this does not happen.
 
        if context < 0:
 
            context = 0
 

	
 
        if hasattr(rev1, 'raw_id'):
 
            rev1 = getattr(rev1, 'raw_id')
 

	
 
        if hasattr(rev2, 'raw_id'):
 
            rev2 = getattr(rev2, 'raw_id')
 

	
 
        # Check if given revisions are present at repository (may raise
 
        # ChangesetDoesNotExistError)
 
        if rev1 != self.EMPTY_CHANGESET:
 
            self.get_changeset(rev1)
 
        self.get_changeset(rev2)
 
        if path:
 
            file_filter = mercurial.match.exact([safe_bytes(path)])
 
        else:
 
            file_filter = None
 

	
 
        return b''.join(mercurial.patch.diff(self._repo, rev1, rev2, match=file_filter,
 
                          opts=mercurial.mdiff.diffopts(git=True,
 
                                        showfunc=True,
 
                                        ignorews=ignore_whitespace,
 
                                        context=context)))
 

	
 
    @classmethod
 
    def _check_url(cls, url, repoui=None):
 
        """
 
        Function will check given url and try to verify if it's a valid
 
        link. Sometimes it may happened that mercurial will issue basic
 
        auth request that can cause whole API to hang when used from python
 
        or other external calls.
 

	
 
        On failures it'll raise urllib2.HTTPError, exception is also thrown
 
        when the return code is non 200
 
        """
 
        # check first if it's not an local url
 
        url = safe_bytes(url)
 
        if os.path.isdir(url) or url.startswith(b'file:'):
 
            return True
 

	
 
        if url.startswith(b'ssh:'):
 
            # in case of invalid uri or authentication issues, sshpeer will
 
            # throw an exception.
 
            mercurial.sshpeer.instance(repoui or mercurial.ui.ui(), url, False).lookup(b'tip')
 
            return True
 

	
 
        url_prefix = None
 
        if b'+' in url[:url.find(b'://')]:
 
            url_prefix, url = url.split(b'+', 1)
 

	
 
        url_obj = mercurial.util.url(url)
 
        test_uri, handlers = get_urllib_request_handlers(url_obj)
 

	
 
        url_obj.passwd = b'*****'
 
        cleaned_uri = str(url_obj)
 

	
 
        o = urllib.request.build_opener(*handlers)
 
        o.addheaders = [('Content-Type', 'application/mercurial-0.1'),
 
                        ('Accept', 'application/mercurial-0.1')]
 

	
 
        req = urllib.request.Request(
 
            "%s?%s" % (
 
                safe_str(test_uri),
 
                urllib.parse.urlencode({
 
                    'cmd': 'between',
 
                    'pairs': "%s-%s" % ('0' * 40, '0' * 40),
 
                })
 
            ))
 

	
 
        try:
 
            resp = o.open(req)
 
            if resp.code != 200:
 
                raise Exception('Return Code is not 200')
 
        except Exception as e:
 
            # means it cannot be cloned
 
            raise urllib.error.URLError("[%s] org_exc: %s" % (cleaned_uri, e))
 

	
 
        if not url_prefix: # skip git+http://... etc
 
            # now check if it's a proper hg repo
 
            try:
 
                mercurial.httppeer.instance(repoui or mercurial.ui.ui(), url, False).lookup(b'tip')
 
            except Exception as e:
 
                raise urllib.error.URLError(
 
                    "url [%s] does not look like an hg repo org_exc: %s"
 
                    % (cleaned_uri, e))
 

	
 
        return True
 

	
 
    def _get_repo(self, create, src_url=None, update_after_clone=False):
 
        """
 
        Function will check for mercurial repository in given path and return
 
        a localrepo object. If there is no repository in that path it will
 
        raise an exception unless ``create`` parameter is set to True - in
 
        that case repository would be created and returned.
 
        If ``src_url`` is given, would try to clone repository from the
 
        location at given clone_point. Additionally it'll make update to
 
        working copy accordingly to ``update_after_clone`` flag
 
        """
 
        try:
 
            if src_url:
 
                url = safe_bytes(self._get_url(src_url))
 
                opts = {}
 
                if not update_after_clone:
 
                    opts.update({'noupdate': True})
 
                MercurialRepository._check_url(url, self.baseui)
 
                mercurial.commands.clone(self.baseui, url, safe_bytes(self.path), **opts)
 

	
 
                # Don't try to create if we've already cloned repo
 
                create = False
 
            return mercurial.localrepo.instance(self.baseui, safe_bytes(self.path), create=create)
 
        except (mercurial.error.Abort, mercurial.error.RepoError) as err:
 
            if create:
 
                msg = "Cannot create repository at %s. Original error was %s" \
 
                    % (self.name, err)
 
            else:
 
                msg = "Not valid repository at %s. Original error was %s" \
 
                    % (self.name, err)
 
            raise RepositoryError(msg)
 

	
 
    @LazyProperty
 
    def in_memory_changeset(self):
 
        return MercurialInMemoryChangeset(self)
 

	
 
    @LazyProperty
 
    def description(self):
 
        _desc = self._repo.ui.config(b'web', b'description', None, untrusted=True)
 
        return safe_str(_desc or b'unknown')
 

	
 
    @LazyProperty
 
    def contact(self):
 
        return safe_str(mercurial.hgweb.common.get_contact(self._repo.ui.config)
 
                            or b'Unknown')
 

	
 
    @LazyProperty
 
    def last_change(self):
 
        """
 
        Returns last change made on this repository as datetime object
 
        """
 
        return date_fromtimestamp(self._get_mtime(), makedate()[1])
 

	
 
    def _get_mtime(self):
 
        try:
 
            return time.mktime(self.get_changeset().date.timetuple())
 
        except RepositoryError:
 
            # fallback to filesystem
 
            cl_path = os.path.join(self.path, '.hg', "00changelog.i")
 
            st_path = os.path.join(self.path, '.hg', "store")
 
            if os.path.exists(cl_path):
 
                return os.stat(cl_path).st_mtime
 
            else:
 
                return os.stat(st_path).st_mtime
 

	
 
    def _get_revision(self, revision):
 
        """
 
        Given any revision identifier, returns a 40 char string with revision hash.
 

	
 
        :param revision: str or int or None
 
        """
 
        if self._empty:
 
            raise EmptyRepositoryError("There are no changesets yet")
 

	
 
        if revision in [-1, None]:
 
            revision = b'tip'
 
        elif isinstance(revision, str):
 
            revision = safe_bytes(revision)
 

	
 
        try:
 
            if isinstance(revision, int):
 
                return ascii_str(self._repo[revision].hex())
 
            return ascii_str(mercurial.scmutil.revsymbol(self._repo, revision).hex())
 
        except (IndexError, ValueError, mercurial.error.RepoLookupError, TypeError):
 
            msg = "Revision %r does not exist for %s" % (safe_str(revision), self.name)
 
            raise ChangesetDoesNotExistError(msg)
 
        except (LookupError, ):
 
            msg = "Ambiguous identifier `%s` for %s" % (safe_str(revision), self.name)
 
            raise ChangesetDoesNotExistError(msg)
 

	
 
    def get_ref_revision(self, ref_type, ref_name):
 
        """
 
        Returns revision number for the given reference.
 
        """
 
        if ref_type == 'rev' and not ref_name.strip('0'):
 
            return self.EMPTY_CHANGESET
 
        # lookup up the exact node id
 
        _revset_predicates = {
 
                'branch': 'branch',
 
                'book': 'bookmark',
 
                'tag': 'tag',
 
                'rev': 'id',
 
            }
 
        # avoid expensive branch(x) iteration over whole repo
 
        rev_spec = "%%s & %s(%%s)" % _revset_predicates[ref_type]
 
        try:
 
            revs = self._repo.revs(rev_spec, ref_name, ref_name)
 
        except LookupError:
 
            msg = "Ambiguous identifier %s:%s for %s" % (ref_type, ref_name, self.name)
 
            raise ChangesetDoesNotExistError(msg)
 
        except mercurial.error.RepoLookupError:
 
            msg = "Revision %s:%s does not exist for %s" % (ref_type, ref_name, self.name)
 
            raise ChangesetDoesNotExistError(msg)
 
        if revs:
 
            revision = revs.last()
 
        else:
 
            # TODO: just report 'not found'?
 
            revision = ref_name
 

	
 
        return self._get_revision(revision)
 

	
 
    def _get_archives(self, archive_name='tip'):
 
        allowed = self.baseui.configlist(b"web", b"allow_archive",
 
                                         untrusted=True)
 
        for name, ext in [(b'zip', '.zip'), (b'gz', '.tar.gz'), (b'bz2', '.tar.bz2')]:
 
            if name in allowed or self._repo.ui.configbool(b"web",
 
                                                           b"allow" + name,
 
                                                           untrusted=True):
 
                yield {"type": safe_str(name), "extension": ext, "node": archive_name}
 

	
 
    def _get_url(self, url):
 
        """
 
        Returns normalized url. If schema is not given, fall back to
 
        filesystem (``file:///``) schema.
 
        """
 
        if url != 'default' and '://' not in url:
 
            url = "file:" + urllib.request.pathname2url(url)
 
        return url
 

	
 
    def get_changeset(self, revision=None):
 
        """
 
        Returns ``MercurialChangeset`` object representing repository's
 
        changeset at the given ``revision``.
 
        """
 
        return MercurialChangeset(repository=self, revision=self._get_revision(revision))
 

	
 
    def get_changesets(self, start=None, end=None, start_date=None,
 
                       end_date=None, branch_name=None, reverse=False, max_revisions=None):
 
        """
 
        Returns iterator of ``MercurialChangeset`` objects from start to end
 
        (both are inclusive)
 

	
 
        :param start: None, str, int or mercurial lookup format
 
        :param end:  None, str, int or mercurial lookup format
 
        :param start_date:
 
        :param end_date:
 
        :param branch_name:
 
        :param reversed: return changesets in reversed order
 
        """
 
        start_raw_id = self._get_revision(start)
 
        start_pos = None if start is None else self.revisions.index(start_raw_id)
 
        end_raw_id = self._get_revision(end)
 
        end_pos = None if end is None else self.revisions.index(end_raw_id)
 

	
 
        if start_pos is not None and end_pos is not None and start_pos > end_pos:
 
            raise RepositoryError("Start revision '%s' cannot be "
 
                                  "after end revision '%s'" % (start, end))
 

	
 
        if branch_name and branch_name not in self.allbranches:
 
            msg = "Branch %r not found in %s" % (branch_name, self.name)
 
            raise BranchDoesNotExistError(msg)
 
        if end_pos is not None:
 
            end_pos += 1
 
        # filter branches
 
        filter_ = []
 
        if branch_name:
 
            filter_.append(b'branch("%s")' % safe_bytes(branch_name))
 
        if start_date:
 
            filter_.append(b'date(">%s")' % safe_bytes(str(start_date)))
 
        if end_date:
 
            filter_.append(b'date("<%s")' % safe_bytes(str(end_date)))
 
        if filter_ or max_revisions:
 
            if filter_:
 
                revspec = b' and '.join(filter_)
 
            else:
 
                revspec = b'all()'
 
            if max_revisions:
 
                revspec = b'limit(%s, %d)' % (revspec, max_revisions)
 
            revisions = mercurial.scmutil.revrange(self._repo, [revspec])
 
        else:
 
            revisions = self.revisions
 

	
 
        # this is very much a hack to turn this into a list; a better solution
 
        # would be to get rid of this function entirely and use revsets
 
        revs = list(revisions)[start_pos:end_pos]
 
        if reverse:
 
            revs.reverse()
 

	
 
        return CollectionGenerator(self, revs)
 

	
 
    def get_diff_changesets(self, org_rev, other_repo, other_rev):
 
        """
 
        Returns lists of changesets that can be merged from this repo @org_rev
 
        to other_repo @other_rev
 
        ... and the other way
 
        ... and the ancestors that would be used for merge
 

	
 
        :param org_rev: the revision we want our compare to be made
 
        :param other_repo: repo object, most likely the fork of org_repo. It has
 
            all changesets that we need to obtain
 
        :param other_rev: revision we want out compare to be made on other_repo
 
        """
 
        ancestors = None
 
        if org_rev == other_rev:
 
            org_changesets = []
 
            other_changesets = []
 

	
 
        else:
 
            # case two independent repos
 
            if self != other_repo:
 
                hgrepo = mercurial.unionrepo.makeunionrepository(other_repo.baseui,
 
                                                       safe_bytes(other_repo.path),
 
                                                       safe_bytes(self.path))
 
                # all ancestors of other_rev will be in other_repo and
 
                # rev numbers from hgrepo can be used in other_repo - org_rev ancestors cannot
 

	
 
            # no remote compare do it on the same repository
 
            else:
 
                hgrepo = other_repo._repo
 

	
 
            ancestors = [ascii_str(hgrepo[ancestor].hex()) for ancestor in
 
                         hgrepo.revs(b"id(%s) & ::id(%s)", ascii_bytes(other_rev), ascii_bytes(org_rev))]
 
            if ancestors:
 
                log.debug("shortcut found: %s is already an ancestor of %s", other_rev, org_rev)
 
            else:
 
                log.debug("no shortcut found: %s is not an ancestor of %s", other_rev, org_rev)
 
                ancestors = [ascii_str(hgrepo[ancestor].hex()) for ancestor in
 
                             hgrepo.revs(b"heads(::id(%s) & ::id(%s))", ascii_bytes(org_rev), ascii_bytes(other_rev))] # FIXME: expensive!
 

	
 
            other_changesets = [
 
                other_repo.get_changeset(rev)
 
                for rev in hgrepo.revs(
 
                    b"ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)",
 
                    ascii_bytes(other_rev), ascii_bytes(org_rev), ascii_bytes(org_rev))
 
            ]
 
            org_changesets = [
 
                self.get_changeset(ascii_str(hgrepo[rev].hex()))
 
                for rev in hgrepo.revs(
 
                    b"ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)",
 
                    ascii_bytes(org_rev), ascii_bytes(other_rev), ascii_bytes(other_rev))
 
            ]
 

	
 
        return other_changesets, org_changesets, ancestors
 

	
 
    def pull(self, url):
 
        """
 
        Tries to pull changes from external location.
 
        """
 
        other = mercurial.hg.peer(self._repo, {}, safe_bytes(self._get_url(url)))
 
        try:
 
            mercurial.exchange.pull(self._repo, other, heads=None, force=None)
 
        except mercurial.error.Abort as err:
 
            # Propagate error but with vcs's type
 
            raise RepositoryError(str(err))
 

	
 
    @LazyProperty
 
    def workdir(self):
 
        """
 
        Returns ``Workdir`` instance for this repository.
 
        """
 
        return MercurialWorkdir(self)
 

	
 
    def get_config_value(self, section, name=None, config_file=None):
 
        """
 
        Returns configuration value for a given [``section``] and ``name``.
 

	
 
        :param section: Section we want to retrieve value from
 
        :param name: Name of configuration we want to retrieve
 
        :param config_file: A path to file which should be used to retrieve
 
          configuration from (might also be a list of file paths)
 
        """
 
        if config_file is None:
 
            config_file = []
 
        elif isinstance(config_file, str):
 
            config_file = [config_file]
 

	
 
        config = self._repo.ui
 
        if config_file:
 
            config = mercurial.ui.ui()
 
            for path in config_file:
 
                config.readconfig(safe_bytes(path))
 
        value = config.config(safe_bytes(section), safe_bytes(name))
 
        return value if value is None else safe_str(value)
 

	
 
    def get_user_name(self, config_file=None):
 
        """
 
        Returns user's name from global configuration file.
 

	
 
        :param config_file: A path to file which should be used to retrieve
 
          configuration from (might also be a list of file paths)
 
        """
 
        username = self.get_config_value('ui', 'username', config_file=config_file)
 
        if username:
 
            return author_name(username)
 
        return None
 

	
 
    def get_user_email(self, config_file=None):
 
        """
 
        Returns user's email from global configuration file.
 

	
 
        :param config_file: A path to file which should be used to retrieve
 
          configuration from (might also be a list of file paths)
 
        """
 
        username = self.get_config_value('ui', 'username', config_file=config_file)
 
        if username:
 
            return author_email(username)
 
        return None
kallithea/model/pull_request.py
Show inline comments
 
# -*- coding: utf-8 -*-
 
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
 
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
 
#
 
# This program is distributed in the hope that it will be useful,
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
# GNU General Public License for more details.
 
#
 
# You should have received a copy of the GNU General Public License
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
"""
 
kallithea.model.pull_request
 
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

	
 
pull request model for Kallithea
 

	
 
This file was forked by the Kallithea project in July 2014.
 
Original author and date, and relevant copyright and licensing information is below:
 
:created_on: Jun 6, 2012
 
:author: marcink
 
:copyright: (c) 2013 RhodeCode GmbH, and others.
 
:license: GPLv3, see LICENSE.md for more details.
 
"""
 

	
 
import datetime
 
import logging
 
import re
 

	
 
from tg import request
 
from tg.i18n import ugettext as _
 

	
 
from kallithea.lib import helpers as h
 
from kallithea.lib.hooks import log_create_pullrequest
 
from kallithea.lib.utils import extract_mentioned_users
 
from kallithea.lib.utils2 import ascii_bytes
 
from kallithea.model.db import ChangesetStatus, PullRequest, PullRequestReviewer, User
 
from kallithea.model.meta import Session
 
from kallithea.model.notification import NotificationModel
 

	
 

	
 
log = logging.getLogger(__name__)
 

	
 

	
 
def _assert_valid_reviewers(seq):
 
    """Sanity check: elements are actual User objects, and not the default user."""
 
    assert not any(user.is_default_user for user in seq)
 

	
 

	
 
class PullRequestModel(object):
 

	
 
    def add_reviewers(self, user, pr, reviewers, mention_recipients=None):
 
        """Add reviewer and send notification to them.
 
        """
 
        reviewers = set(reviewers)
 
        _assert_valid_reviewers(reviewers)
 
        if mention_recipients is not None:
 
            mention_recipients = set(mention_recipients) - reviewers
 
            _assert_valid_reviewers(mention_recipients)
 

	
 
        redundant_reviewers = set(User.query() \
 
            .join(PullRequestReviewer) \
 
            .filter(PullRequestReviewer.pull_request == pr) \
 
            .filter(PullRequestReviewer.user_id.in_(r.user_id for r in reviewers))
 
            .all())
 

	
 
        if redundant_reviewers:
 
            log.debug('Following reviewers were already part of pull request %s: %s', pr.pull_request_id, redundant_reviewers)
 

	
 
            reviewers -= redundant_reviewers
 

	
 
        log.debug('Adding reviewers to pull request %s: %s', pr.pull_request_id, reviewers)
 
        for reviewer in reviewers:
 
            prr = PullRequestReviewer(reviewer, pr)
 
            Session().add(prr)
 

	
 
        # notification to reviewers
 
        pr_url = pr.url(canonical=True)
 
        threading = ['%s-pr-%s@%s' % (pr.other_repo.repo_name,
 
                                      pr.pull_request_id,
 
                                      h.canonical_hostname())]
 
        subject = h.link_to(
 
            _('%(user)s wants you to review pull request %(pr_nice_id)s: %(pr_title)s') %
 
                {'user': user.username,
 
                 'pr_title': pr.title,
 
                 'pr_nice_id': pr.nice_id()},
 
            pr_url)
 
        body = pr.description
 
        _org_ref_type, org_ref_name, _org_rev = pr.org_ref.split(':')
 
        _other_ref_type, other_ref_name, _other_rev = pr.other_ref.split(':')
 
        revision_data = [(x.raw_id, x.message)
 
                         for x in map(pr.org_repo.get_changeset, pr.revisions)]
 
        email_kwargs = {
 
            'pr_title': pr.title,
 
            'pr_title_short': h.shorter(pr.title, 50),
 
            'pr_user_created': user.full_name_and_username,
 
            'pr_repo_url': h.canonical_url('summary_home', repo_name=pr.other_repo.repo_name),
 
            'pr_url': pr_url,
 
            'pr_revisions': revision_data,
 
            'repo_name': pr.other_repo.repo_name,
 
            'org_repo_name': pr.org_repo.repo_name,
 
            'pr_nice_id': pr.nice_id(),
 
            'pr_target_repo': h.canonical_url('summary_home',
 
                               repo_name=pr.other_repo.repo_name),
 
            'pr_target_branch': other_ref_name,
 
            'pr_source_repo': h.canonical_url('summary_home',
 
                               repo_name=pr.org_repo.repo_name),
 
            'pr_source_branch': org_ref_name,
 
            'pr_owner': pr.owner,
 
            'pr_owner_username': pr.owner.username,
 
            'pr_username': user.username,
 
            'threading': threading,
 
            'is_mention': False,
 
            }
 
        if reviewers:
 
            NotificationModel().create(created_by=user, subject=subject, body=body,
 
                                       recipients=reviewers,
 
                                       type_=NotificationModel.TYPE_PULL_REQUEST,
 
                                       email_kwargs=email_kwargs)
 

	
 
        if mention_recipients:
 
            email_kwargs['is_mention'] = True
 
            subject = _('[Mention]') + ' ' + subject
 
            # FIXME: this subject is wrong and unused!
 
            NotificationModel().create(created_by=user, subject=subject, body=body,
 
                                       recipients=mention_recipients,
 
                                       type_=NotificationModel.TYPE_PULL_REQUEST,
 
                                       email_kwargs=email_kwargs)
 

	
 
        return reviewers, redundant_reviewers
 

	
 
    def mention_from_description(self, user, pr, old_description=''):
 
        mention_recipients = (extract_mentioned_users(pr.description) -
 
                              extract_mentioned_users(old_description))
 

	
 
        log.debug("Mentioning %s", mention_recipients)
 
        self.add_reviewers(user, pr, set(), mention_recipients)
 

	
 
    def remove_reviewers(self, user, pull_request, reviewers):
 
        """Remove specified users from being reviewers of the PR."""
 
        if not reviewers:
 
            return # avoid SQLAlchemy warning about empty sequence for IN-predicate
 

	
 
        PullRequestReviewer.query() \
 
            .filter_by(pull_request=pull_request) \
 
            .filter(PullRequestReviewer.user_id.in_(r.user_id for r in reviewers)) \
 
            .delete(synchronize_session='fetch') # the default of 'evaluate' is not available
 

	
 
    def delete(self, pull_request):
 
        pull_request = PullRequest.guess_instance(pull_request)
 
        Session().delete(pull_request)
 
        if pull_request.org_repo.scm_instance.alias == 'git':
 
            # remove a ref under refs/pull/ so that commits can be garbage-collected
 
            try:
 
                del pull_request.org_repo.scm_instance._repo[b"refs/pull/%d/head" % pull_request.pull_request_id]
 
            except KeyError:
 
                pass
 

	
 
    def close_pull_request(self, pull_request):
 
        pull_request = PullRequest.guess_instance(pull_request)
 
        pull_request.status = PullRequest.STATUS_CLOSED
 
        pull_request.updated_on = datetime.datetime.now()
 

	
 

	
 
class CreatePullRequestAction(object):
 

	
 
    class ValidationError(Exception):
 
        pass
 

	
 
    class Empty(ValidationError):
 
        pass
 

	
 
    class AmbiguousAncestor(ValidationError):
 
        pass
 

	
 
    class Unauthorized(ValidationError):
 
        pass
 

	
 
    @staticmethod
 
    def is_user_authorized(org_repo, other_repo):
 
        """Performs authorization check with only the minimum amount of
 
        information needed for such a check, rather than a full command
 
        object.
 
        """
 
        if (h.HasRepoPermissionLevel('read')(org_repo.repo_name) and
 
            h.HasRepoPermissionLevel('read')(other_repo.repo_name)
 
        ):
 
            return True
 

	
 
        return False
 

	
 
    def __init__(self, org_repo, other_repo, org_ref, other_ref, title, description, owner, reviewers):
 
        from kallithea.controllers.compare import CompareController
 
        reviewers = set(reviewers)
 
        _assert_valid_reviewers(reviewers)
 

	
 
        (org_ref_type,
 
         org_ref_name,
 
         org_rev) = org_ref.split(':')
 
        org_display = h.short_ref(org_ref_type, org_ref_name)
 
        if org_ref_type == 'rev':
 
            cs = org_repo.scm_instance.get_changeset(org_rev)
 
            org_ref = 'branch:%s:%s' % (cs.branch, cs.raw_id)
 

	
 
        (other_ref_type,
 
         other_ref_name,
 
         other_rev) = other_ref.split(':')
 
        if other_ref_type == 'rev':
 
            cs = other_repo.scm_instance.get_changeset(other_rev)
 
            other_ref_name = cs.raw_id[:12]
 
            other_ref = '%s:%s:%s' % (other_ref_type, other_ref_name, cs.raw_id)
 
        other_display = h.short_ref(other_ref_type, other_ref_name)
 

	
 
        cs_ranges, _cs_ranges_not, ancestor_revs = \
 
            CompareController._get_changesets(org_repo.scm_instance.alias,
 
                                              other_repo.scm_instance, other_rev, # org and other "swapped"
 
                                              org_repo.scm_instance, org_rev,
 
                                              )
 
            org_repo.scm_instance.get_diff_changesets(other_rev, org_repo.scm_instance, org_rev) # org and other "swapped"
 
        if not cs_ranges:
 
            raise self.Empty(_('Cannot create empty pull request'))
 

	
 
        if not ancestor_revs:
 
            ancestor_rev = org_repo.scm_instance.EMPTY_CHANGESET
 
        elif len(ancestor_revs) == 1:
 
            ancestor_rev = ancestor_revs[0]
 
        else:
 
            raise self.AmbiguousAncestor(
 
                _('Cannot create pull request - criss cross merge detected, please merge a later %s revision to %s')
 
                % (other_ref_name, org_ref_name))
 

	
 
        self.revisions = [cs_.raw_id for cs_ in cs_ranges]
 

	
 
        # hack: ancestor_rev is not an other_rev but we want to show the
 
        # requested destination and have the exact ancestor
 
        other_ref = '%s:%s:%s' % (other_ref_type, other_ref_name, ancestor_rev)
 

	
 
        if not title:
 
            if org_repo == other_repo:
 
                title = '%s to %s' % (org_display, other_display)
 
            else:
 
                title = '%s#%s to %s#%s' % (org_repo.repo_name, org_display,
 
                                            other_repo.repo_name, other_display)
 
        description = description or _('No description')
 

	
 
        self.org_repo = org_repo
 
        self.other_repo = other_repo
 
        self.org_ref = org_ref
 
        self.org_rev = org_rev
 
        self.other_ref = other_ref
 
        self.title = title
 
        self.description = description
 
        self.owner = owner
 
        self.reviewers = reviewers
 

	
 
        if not CreatePullRequestAction.is_user_authorized(self.org_repo, self.other_repo):
 
            raise self.Unauthorized(_('You are not authorized to create the pull request'))
 

	
 
    def execute(self):
 
        created_by = User.get(request.authuser.user_id)
 

	
 
        pr = PullRequest()
 
        pr.org_repo = self.org_repo
 
        pr.org_ref = self.org_ref
 
        pr.other_repo = self.other_repo
 
        pr.other_ref = self.other_ref
 
        pr.revisions = self.revisions
 
        pr.title = self.title
 
        pr.description = self.description
 
        pr.owner = self.owner
 
        Session().add(pr)
 
        Session().flush() # make database assign pull_request_id
 

	
 
        if self.org_repo.scm_instance.alias == 'git':
 
            # create a ref under refs/pull/ so that commits don't get garbage-collected
 
            self.org_repo.scm_instance._repo[b"refs/pull/%d/head" % pr.pull_request_id] = ascii_bytes(self.org_rev)
 

	
 
        # reset state to under-review
 
        from kallithea.model.changeset_status import ChangesetStatusModel
 
        from kallithea.model.comment import ChangesetCommentsModel
 
        comment = ChangesetCommentsModel().create(
 
            text='',
 
            repo=self.org_repo,
 
            author=created_by,
 
            pull_request=pr,
 
            send_email=False,
 
            status_change=ChangesetStatus.STATUS_UNDER_REVIEW,
 
        )
 
        ChangesetStatusModel().set_status(
 
            self.org_repo,
 
            ChangesetStatus.STATUS_UNDER_REVIEW,
 
            created_by,
 
            comment,
 
            pull_request=pr,
 
        )
 

	
 
        mention_recipients = extract_mentioned_users(self.description)
 
        PullRequestModel().add_reviewers(created_by, pr, self.reviewers, mention_recipients)
 

	
 
        log_create_pullrequest(pr.get_dict(), created_by)
 

	
 
        return pr
 

	
 

	
 
class CreatePullRequestIterationAction(object):
 
    @staticmethod
 
    def is_user_authorized(old_pull_request):
 
        """Performs authorization check with only the minimum amount of
 
        information needed for such a check, rather than a full command
 
        object.
 
        """
 
        if h.HasPermissionAny('hg.admin')():
 
            return True
 

	
 
        # Authorized to edit the old PR?
 
        if request.authuser.user_id != old_pull_request.owner_id:
 
            return False
 

	
 
        # Authorized to create a new PR?
 
        if not CreatePullRequestAction.is_user_authorized(old_pull_request.org_repo, old_pull_request.other_repo):
 
            return False
 

	
 
        return True
 

	
 
    def __init__(self, old_pull_request, new_org_rev, new_other_rev, title, description, owner, reviewers):
 
        self.old_pull_request = old_pull_request
 

	
 
        org_repo = old_pull_request.org_repo
 
        org_ref_type, org_ref_name, org_rev = old_pull_request.org_ref.split(':')
 

	
 
        other_repo = old_pull_request.other_repo
 
        other_ref_type, other_ref_name, other_rev = old_pull_request.other_ref.split(':') # other_rev is ancestor
 
        #assert other_ref_type == 'branch', other_ref_type # TODO: what if not?
 

	
 
        new_org_ref = '%s:%s:%s' % (org_ref_type, org_ref_name, new_org_rev)
 
        new_other_ref = '%s:%s:%s' % (other_ref_type, other_ref_name, new_other_rev)
 

	
 
        self.create_action = CreatePullRequestAction(org_repo, other_repo, new_org_ref, new_other_ref, None, None, owner, reviewers)
 

	
 
        # Generate complete title/description
 

	
 
        old_revisions = set(old_pull_request.revisions)
 
        revisions = self.create_action.revisions
 
        new_revisions = [r for r in revisions if r not in old_revisions]
 
        lost = old_revisions.difference(revisions)
 

	
 
        infos = ['This is a new iteration of %s "%s".' %
 
                 (h.canonical_url('pullrequest_show', repo_name=old_pull_request.other_repo.repo_name,
 
                      pull_request_id=old_pull_request.pull_request_id),
 
                  old_pull_request.title)]
 

	
 
        if lost:
 
            infos.append(_('Missing changesets since the previous iteration:'))
 
            for r in old_pull_request.revisions:
 
                if r in lost:
 
                    rev_desc = org_repo.get_changeset(r).message.split('\n')[0]
 
                    infos.append('  %s %s' % (h.short_id(r), rev_desc))
 

	
 
        if new_revisions:
 
            infos.append(_('New changesets on %s %s since the previous iteration:') % (org_ref_type, org_ref_name))
 
            for r in reversed(revisions):
 
                if r in new_revisions:
 
                    rev_desc = org_repo.get_changeset(r).message.split('\n')[0]
 
                    infos.append('  %s %s' % (h.short_id(r), h.shorter(rev_desc, 80)))
 

	
 
            if self.create_action.other_ref == old_pull_request.other_ref:
 
                infos.append(_("Ancestor didn't change - diff since previous iteration:"))
 
                infos.append(h.canonical_url('compare_url',
 
                                 repo_name=org_repo.repo_name, # other_repo is always same as repo_name
 
                                 org_ref_type='rev', org_ref_name=h.short_id(org_rev), # use old org_rev as base
 
                                 other_ref_type='rev', other_ref_name=h.short_id(new_org_rev),
 
                                 )) # note: linear diff, merge or not doesn't matter
 
            else:
 
                infos.append(_('This iteration is based on another %s revision and there is no simple diff.') % other_ref_name)
 
        else:
 
            infos.append(_('No changes found on %s %s since previous iteration.') % (org_ref_type, org_ref_name))
 
            # TODO: fail?
 

	
 
        v = 2
 
        m = re.match(r'(.*)\(v(\d+)\)\s*$', title)
 
        if m is not None:
 
            title = m.group(1)
 
            v = int(m.group(2)) + 1
 
        self.create_action.title = '%s (v%s)' % (title.strip(), v)
 

	
 
        # using a mail-like separator, insert new iteration info in description with latest first
 
        descriptions = description.replace('\r\n', '\n').split('\n-- \n', 1)
 
        description = descriptions[0].strip() + '\n\n-- \n' + '\n'.join(infos)
 
        if len(descriptions) > 1:
 
            description += '\n\n' + descriptions[1].strip()
 
        self.create_action.description = description
 

	
 
        if not CreatePullRequestIterationAction.is_user_authorized(self.old_pull_request):
 
            raise CreatePullRequestAction.Unauthorized(_('You are not authorized to create the pull request'))
 

	
 
    def execute(self):
 
        pull_request = self.create_action.execute()
 

	
 
        # Close old iteration
 
        from kallithea.model.comment import ChangesetCommentsModel
 
        ChangesetCommentsModel().create(
 
            text=_('Closed, next iteration: %s .') % pull_request.url(canonical=True),
 
            repo=self.old_pull_request.other_repo_id,
 
            author=request.authuser.user_id,
 
            pull_request=self.old_pull_request.pull_request_id,
 
            closing_pr=True)
 
        PullRequestModel().close_pull_request(self.old_pull_request.pull_request_id)
 
        return pull_request
0 comments (0 inline, 0 general)