Changeset - 911669ac48a5
[Not reviewed]
default
0 4 0
Mads Kiilerich (mads) - 5 years ago 2020-11-12 18:56:53
mads@kiilerich.com
Grafted from: 4802bd37c2e9
vcs: drop repo contact ... and the odd uses of it

A half-baked and essentially unused hgweb feature.

The reference in get_commits_stats doesn't seem reachable in any relevant use
cases.
4 files changed with 0 insertions and 19 deletions:
0 comments (0 inline, 0 general)
kallithea/lib/indexers/daemon.py
Show inline comments
 
@@ -102,232 +102,230 @@ class WhooshIndexingDaemon(object):
 
        elif not exists_in(self.index_location, IDX_NAME):
 
            log.info('Running full index build, as the file content '
 
                     'index does not exist')
 
        elif not exists_in(self.index_location, CHGSET_IDX_NAME):
 
            log.info('Running full index build, as the changeset '
 
                     'index does not exist')
 
        else:
 
            self.initial = False
 

	
 
    def _get_index_revision(self, repo):
 
        db_repo = db.Repository.get_by_repo_name(repo.name)
 
        landing_rev = 'tip'
 
        if db_repo:
 
            _rev_type, _rev = db_repo.landing_rev
 
            landing_rev = _rev
 
        return landing_rev
 

	
 
    def _get_index_changeset(self, repo, index_rev=None):
 
        if not index_rev:
 
            index_rev = self._get_index_revision(repo)
 
        cs = repo.get_changeset(index_rev)
 
        return cs
 

	
 
    def get_paths(self, repo):
 
        """
 
        recursive walk in root dir and return a set of all path in that dir
 
        based on repository walk function
 
        """
 
        index_paths_ = set()
 
        try:
 
            cs = self._get_index_changeset(repo)
 
            for _topnode, _dirs, files in cs.walk('/'):
 
                for f in files:
 
                    index_paths_.add(os.path.join(repo.path, f.path))
 

	
 
        except RepositoryError:
 
            log.debug(traceback.format_exc())
 
            pass
 
        return index_paths_
 

	
 
    def get_node(self, repo, path, index_rev=None):
 
        """
 
        gets a filenode based on given full path.
 

	
 
        :param repo: scm repo instance
 
        :param path: full path including root location
 
        :return: FileNode
 
        """
 
        # FIXME: paths should be normalized ... or even better: don't include repo.path
 
        assert path.startswith(repo.path)
 
        assert path[len(repo.path)] in (os.path.sep, os.path.altsep)
 
        node_path = path[len(repo.path) + 1:]
 
        cs = self._get_index_changeset(repo, index_rev=index_rev)
 
        node = cs.get_node(node_path)
 
        return node
 

	
 
    def is_indexable_node(self, node):
 
        """
 
        Just index the content of chosen files, skipping binary files
 
        """
 
        return (node.extension in INDEX_EXTENSIONS or node.name in INDEX_FILENAMES) and \
 
               not node.is_binary
 

	
 
    def get_node_mtime(self, node):
 
        return mktime(node.last_changeset.date.timetuple())
 

	
 
    def add_doc(self, writer, path, repo, repo_name, index_rev=None):
 
        """
 
        Adding doc to writer this function itself fetches data from
 
        the instance of vcs backend
 
        """
 
        try:
 
            node = self.get_node(repo, path, index_rev)
 
        except (ChangesetError, NodeDoesNotExistError):
 
            log.debug("    >> %s - not found in %s %s", path, repo, index_rev)
 
            return 0, 0
 

	
 
        indexed = indexed_w_content = 0
 
        if self.is_indexable_node(node):
 
            bytes_content = node.content
 
            if b'\0' in bytes_content:
 
                log.warning('    >> %s - no text content', path)
 
                u_content = ''
 
            else:
 
                log.debug('    >> %s', path)
 
                u_content = safe_str(bytes_content)
 
                indexed_w_content += 1
 

	
 
        else:
 
            log.debug('    >> %s - not indexable', path)
 
            # just index file name without it's content
 
            u_content = ''
 
            indexed += 1
 

	
 
        writer.add_document(
 
            fileid=path,
 
            owner=repo.contact,
 
            repository_rawname=repo_name,
 
            repository=repo_name,
 
            path=path,
 
            content=u_content,
 
            modtime=self.get_node_mtime(node),
 
            extension=node.extension
 
        )
 
        return indexed, indexed_w_content
 

	
 
    def index_changesets(self, writer, repo_name, repo, start_rev=None):
 
        """
 
        Add all changeset in the vcs repo starting at start_rev
 
        to the index writer
 

	
 
        :param writer: the whoosh index writer to add to
 
        :param repo_name: name of the repository from whence the
 
          changeset originates including the repository group
 
        :param repo: the vcs repository instance to index changesets for,
 
          the presumption is the repo has changesets to index
 
        :param start_rev=None: the full sha id to start indexing from
 
          if start_rev is None then index from the first changeset in
 
          the repo
 
        """
 

	
 
        if start_rev is None:
 
            start_rev = repo[0].raw_id
 

	
 
        log.debug('Indexing changesets in %s, starting at rev %s',
 
                  repo_name, start_rev)
 

	
 
        indexed = 0
 
        cs_iter = repo.get_changesets(start=start_rev)
 
        total = len(cs_iter)
 
        for cs in cs_iter:
 
            indexed += 1
 
            log.debug('    >> %s %s/%s', cs, indexed, total)
 
            writer.add_document(
 
                raw_id=cs.raw_id,
 
                owner=repo.contact,
 
                date=cs._timestamp,
 
                repository_rawname=repo_name,
 
                repository=repo_name,
 
                author=cs.author,
 
                message=cs.message,
 
                last=cs.last,
 
                added=' '.join(node.path for node in cs.added).lower(),
 
                removed=' '.join(node.path for node in cs.removed).lower(),
 
                changed=' '.join(node.path for node in cs.changed).lower(),
 
                parents=' '.join(cs.raw_id for cs in cs.parents),
 
            )
 

	
 
        return indexed
 

	
 
    def index_files(self, file_idx_writer, repo_name, repo):
 
        """
 
        Index files for given repo_name
 

	
 
        :param file_idx_writer: the whoosh index writer to add to
 
        :param repo_name: name of the repository we're indexing
 
        :param repo: instance of vcs repo
 
        """
 
        i_cnt = iwc_cnt = 0
 
        log.debug('Building file index for %s @revision:%s', repo_name,
 
                                                self._get_index_revision(repo))
 
        index_rev = self._get_index_revision(repo)
 
        for idx_path in self.get_paths(repo):
 
            i, iwc = self.add_doc(file_idx_writer, idx_path, repo, repo_name, index_rev)
 
            i_cnt += i
 
            iwc_cnt += iwc
 

	
 
        log.debug('added %s files %s with content for repo %s',
 
                  i_cnt + iwc_cnt, iwc_cnt, repo.path)
 
        return i_cnt, iwc_cnt
 

	
 
    def update_changeset_index(self):
 
        idx = open_dir(self.index_location, indexname=CHGSET_IDX_NAME)
 

	
 
        with idx.searcher() as searcher:
 
            writer = idx.writer()
 
            writer_is_dirty = False
 
            try:
 
                indexed_total = 0
 
                repo_name = None
 
                for repo_name, repo in sorted(self.repo_paths.items()):
 
                    log.debug('Updating changeset index for repo %s', repo_name)
 
                    # skip indexing if there aren't any revs in the repo
 
                    num_of_revs = len(repo)
 
                    if num_of_revs < 1:
 
                        continue
 

	
 
                    qp = QueryParser('repository', schema=CHGSETS_SCHEMA)
 
                    q = qp.parse("last:t AND %s" % repo_name)
 

	
 
                    results = searcher.search(q)
 

	
 
                    # default to scanning the entire repo
 
                    last_rev = 0
 
                    start_id = None
 

	
 
                    if len(results) > 0:
 
                        # assuming that there is only one result, if not this
 
                        # may require a full re-index.
 
                        start_id = results[0]['raw_id']
 
                        try:
 
                            last_rev = repo.get_changeset(revision=start_id).revision
 
                        except ChangesetDoesNotExistError:
 
                            log.error('previous last revision %s not found - indexing from scratch', start_id)
 
                            start_id = None
 

	
 
                    # there are new changesets to index or a new repo to index
 
                    if last_rev == 0 or num_of_revs > last_rev + 1:
 
                        # delete the docs in the index for the previous
 
                        # last changeset(s)
 
                        for hit in results:
 
                            q = qp.parse("last:t AND %s AND raw_id:%s" %
 
                                            (repo_name, hit['raw_id']))
 
                            writer.delete_by_query(q)
 

	
 
                        # index from the previous last changeset + all new ones
 
                        indexed_total += self.index_changesets(writer,
 
                                                repo_name, repo, start_id)
 
                        writer_is_dirty = True
 
                log.debug('indexed %s changesets for repo %s',
 
                          indexed_total, repo_name
 
                )
 
            finally:
 
                if writer_is_dirty:
 
                    log.debug('>> COMMITING CHANGES TO CHANGESET INDEX<<')
 
                    writer.commit(merge=True)
 
                    log.debug('>>> FINISHED REBUILDING CHANGESET INDEX <<<')
 
                else:
 
                    log.debug('>> NOTHING TO COMMIT TO CHANGESET INDEX<<')
 

	
 
    def update_file_index(self):
 
        log.debug('STARTING INCREMENTAL INDEXING UPDATE FOR EXTENSIONS %s '
kallithea/lib/vcs/backends/git/repository.py
Show inline comments
 
@@ -295,197 +295,192 @@ class GitRepository(BaseRepository):
 
        includes = [ascii_str(sha) for key, (sha, type_) in self._parsed_refs.items()
 
                    if type_ != b'T']
 
        return [c.commit.id for c in self._repo.get_walker(include=includes)]
 

	
 
    def _get_revision(self, revision):
 
        """
 
        Given any revision identifier, returns a 40 char string with revision hash.
 
        """
 
        if self._empty:
 
            raise EmptyRepositoryError("There are no changesets yet")
 

	
 
        if revision in (None, '', 'tip', 'HEAD', 'head', -1):
 
            revision = -1
 

	
 
        if isinstance(revision, int):
 
            try:
 
                return self.revisions[revision]
 
            except IndexError:
 
                msg = "Revision %r does not exist for %s" % (revision, self.name)
 
                raise ChangesetDoesNotExistError(msg)
 

	
 
        if isinstance(revision, str):
 
            if revision.isdigit() and (len(revision) < 12 or len(revision) == revision.count('0')):
 
                try:
 
                    return self.revisions[int(revision)]
 
                except IndexError:
 
                    msg = "Revision %r does not exist for %s" % (revision, self)
 
                    raise ChangesetDoesNotExistError(msg)
 

	
 
            # get by branch/tag name
 
            _ref_revision = self._parsed_refs.get(safe_bytes(revision))
 
            if _ref_revision:  # and _ref_revision[1] in [b'H', b'RH', b'T']:
 
                return ascii_str(_ref_revision[0])
 

	
 
            if revision in self.revisions:
 
                return revision
 

	
 
            # maybe it's a tag ? we don't have them in self.revisions
 
            if revision in self.tags.values():
 
                return revision
 

	
 
            if SHA_PATTERN.match(revision):
 
                msg = "Revision %r does not exist for %s" % (revision, self.name)
 
                raise ChangesetDoesNotExistError(msg)
 

	
 
        raise ChangesetDoesNotExistError("Given revision %r not recognized" % revision)
 

	
 
    def get_ref_revision(self, ref_type, ref_name):
 
        """
 
        Returns ``GitChangeset`` object representing repository's
 
        changeset at the given ``revision``.
 
        """
 
        return self._get_revision(ref_name)
 

	
 
    def _get_archives(self, archive_name='tip'):
 

	
 
        for i in [('zip', '.zip'), ('gz', '.tar.gz'), ('bz2', '.tar.bz2')]:
 
            yield {"type": i[0], "extension": i[1], "node": archive_name}
 

	
 
    def _get_url(self, url):
 
        """
 
        Returns normalized url. If schema is not given, would fall to
 
        filesystem (``file:///``) schema.
 
        """
 
        if url != 'default' and '://' not in url:
 
            url = ':///'.join(('file', url))
 
        return url
 

	
 
    @LazyProperty
 
    def name(self):
 
        return os.path.basename(self.path)
 

	
 
    @LazyProperty
 
    def last_change(self):
 
        """
 
        Returns last change made on this repository as datetime object
 
        """
 
        return date_fromtimestamp(self._get_mtime(), makedate()[1])
 

	
 
    def _get_mtime(self):
 
        try:
 
            return time.mktime(self.get_changeset().date.timetuple())
 
        except RepositoryError:
 
            idx_loc = '' if self.bare else '.git'
 
            # fallback to filesystem
 
            in_path = os.path.join(self.path, idx_loc, "index")
 
            he_path = os.path.join(self.path, idx_loc, "HEAD")
 
            if os.path.exists(in_path):
 
                return os.stat(in_path).st_mtime
 
            else:
 
                return os.stat(he_path).st_mtime
 

	
 
    @LazyProperty
 
    def description(self):
 
        return safe_str(self._repo.get_description() or b'unknown')
 

	
 
    @LazyProperty
 
    def contact(self):
 
        undefined_contact = 'Unknown'
 
        return undefined_contact
 

	
 
    @property
 
    def branches(self):
 
        if not self.revisions:
 
            return {}
 
        _branches = [(safe_str(key), ascii_str(sha))
 
                     for key, (sha, type_) in self._parsed_refs.items() if type_ == b'H']
 
        return OrderedDict(sorted(_branches, key=(lambda ctx: ctx[0]), reverse=False))
 

	
 
    @LazyProperty
 
    def closed_branches(self):
 
        return {}
 

	
 
    @LazyProperty
 
    def tags(self):
 
        return self._get_tags()
 

	
 
    def _get_tags(self):
 
        if not self.revisions:
 
            return {}
 
        _tags = [(safe_str(key), ascii_str(sha))
 
                 for key, (sha, type_) in self._parsed_refs.items() if type_ == b'T']
 
        return OrderedDict(sorted(_tags, key=(lambda ctx: ctx[0]), reverse=True))
 

	
 
    def tag(self, name, user, revision=None, message=None, date=None,
 
            **kwargs):
 
        """
 
        Creates and returns a tag for the given ``revision``.
 

	
 
        :param name: name for new tag
 
        :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
 
        :param revision: changeset id for which new tag would be created
 
        :param message: message of the tag's commit
 
        :param date: date of tag's commit
 

	
 
        :raises TagAlreadyExistError: if tag with same name already exists
 
        """
 
        if name in self.tags:
 
            raise TagAlreadyExistError("Tag %s already exists" % name)
 
        changeset = self.get_changeset(revision)
 
        message = message or "Added tag %s for commit %s" % (name,
 
            changeset.raw_id)
 
        self._repo.refs[b"refs/tags/%s" % safe_bytes(name)] = changeset._commit.id
 

	
 
        self._parsed_refs = self._get_parsed_refs()
 
        self.tags = self._get_tags()
 
        return changeset
 

	
 
    def remove_tag(self, name, user, message=None, date=None):
 
        """
 
        Removes tag with the given ``name``.
 

	
 
        :param name: name of the tag to be removed
 
        :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
 
        :param message: message of the tag's removal commit
 
        :param date: date of tag's removal commit
 

	
 
        :raises TagDoesNotExistError: if tag with given name does not exists
 
        """
 
        if name not in self.tags:
 
            raise TagDoesNotExistError("Tag %s does not exist" % name)
 
        # self._repo.refs is a DiskRefsContainer, and .path gives the full absolute path of '.git'
 
        tagpath = os.path.join(safe_str(self._repo.refs.path), 'refs', 'tags', name)
 
        try:
 
            os.remove(tagpath)
 
            self._parsed_refs = self._get_parsed_refs()
 
            self.tags = self._get_tags()
 
        except OSError as e:
 
            raise RepositoryError(e.strerror)
 

	
 
    @LazyProperty
 
    def bookmarks(self):
 
        """
 
        Gets bookmarks for this repository
 
        """
 
        return {}
 

	
 
    @LazyProperty
 
    def _parsed_refs(self):
 
        return self._get_parsed_refs()
 

	
 
    def _get_parsed_refs(self):
 
        """Return refs as a dict, like:
 
        { b'v0.2.0': [b'599ba911aa24d2981225f3966eb659dfae9e9f30', b'T'] }
 
        """
 
        _repo = self._repo
 
        refs = _repo.get_refs()
 
        keys = [(b'refs/heads/', b'H'),
 
                (b'refs/remotes/origin/', b'RH'),
 
                (b'refs/tags/', b'T')]
 
        _refs = {}
 
        for ref, sha in refs.items():
 
            for k, type_ in keys:
 
                if ref.startswith(k):
 
                    _key = ref[len(k):]
 
                    if type_ == b'T':
 
                        obj = _repo.get_object(sha)
kallithea/lib/vcs/backends/hg/repository.py
Show inline comments
 
@@ -320,197 +320,192 @@ class MercurialRepository(BaseRepository
 
        # check first if it's not an local url
 
        if os.path.isabs(url) and os.path.isdir(url) or parsed_url.scheme == 'file':
 
            # When creating repos, _get_url will use file protocol for local paths
 
            return
 

	
 
        if parsed_url.scheme not in ['http', 'https', 'ssh', 'git+http', 'git+https']:
 
            raise urllib.error.URLError("Unsupported protocol in URL %r" % url)
 

	
 
        url = safe_bytes(url)
 

	
 
        if parsed_url.scheme == 'ssh':
 
            # in case of invalid uri or authentication issues, sshpeer will
 
            # throw an exception.
 
            mercurial.sshpeer.instance(repoui or mercurial.ui.ui(), url, False).lookup(b'tip')
 
            return
 

	
 
        if '+' in parsed_url.scheme:  # strip 'git+' for hg-git URLs
 
            url = url.split(b'+', 1)[1]
 

	
 
        url_obj = mercurial.util.url(url)
 
        test_uri, handlers = get_urllib_request_handlers(url_obj)
 

	
 
        url_obj.passwd = b'*****'
 
        cleaned_uri = str(url_obj)
 

	
 
        o = urllib.request.build_opener(*handlers)
 
        o.addheaders = [('Content-Type', 'application/mercurial-0.1'),
 
                        ('Accept', 'application/mercurial-0.1')]
 

	
 
        req = urllib.request.Request(
 
            "%s?%s" % (
 
                safe_str(test_uri),
 
                urllib.parse.urlencode({
 
                    'cmd': 'between',
 
                    'pairs': "%s-%s" % ('0' * 40, '0' * 40),
 
                })
 
            ))
 

	
 
        try:
 
            resp = o.open(req)
 
            if resp.code != 200:
 
                raise Exception('Return Code is not 200')
 
        except Exception as e:
 
            # means it cannot be cloned
 
            raise urllib.error.URLError("[%s] org_exc: %s" % (cleaned_uri, e))
 

	
 
        if parsed_url.scheme in ['http', 'https']:  # skip git+http://... etc
 
            # now check if it's a proper hg repo
 
            try:
 
                mercurial.httppeer.instance(repoui or mercurial.ui.ui(), url, False).lookup(b'tip')
 
            except Exception as e:
 
                raise urllib.error.URLError(
 
                    "url [%s] does not look like an hg repo org_exc: %s"
 
                    % (cleaned_uri, e))
 

	
 
    def _get_repo(self, create, src_url=None, update_after_clone=False):
 
        """
 
        Function will check for mercurial repository in given path and return
 
        a localrepo object. If there is no repository in that path it will
 
        raise an exception unless ``create`` parameter is set to True - in
 
        that case repository would be created and returned.
 
        If ``src_url`` is given, would try to clone repository from the
 
        location at given clone_point. Additionally it'll make update to
 
        working copy accordingly to ``update_after_clone`` flag
 
        """
 
        try:
 
            if src_url:
 
                url = self._get_url(src_url)
 
                opts = {}
 
                if not update_after_clone:
 
                    opts.update({'noupdate': True})
 
                MercurialRepository._check_url(url, self.baseui)
 
                mercurial.commands.clone(self.baseui, safe_bytes(url), safe_bytes(self.path), **opts)
 

	
 
                # Don't try to create if we've already cloned repo
 
                create = False
 
            return mercurial.localrepo.instance(self.baseui, safe_bytes(self.path), create=create)
 
        except (mercurial.error.Abort, mercurial.error.RepoError) as err:
 
            if create:
 
                msg = "Cannot create repository at %s. Original error was %s" \
 
                    % (self.name, err)
 
            else:
 
                msg = "Not valid repository at %s. Original error was %s" \
 
                    % (self.name, err)
 
            raise RepositoryError(msg)
 

	
 
    @LazyProperty
 
    def in_memory_changeset(self):
 
        return inmemory.MercurialInMemoryChangeset(self)
 

	
 
    @LazyProperty
 
    def description(self):
 
        _desc = self._repo.ui.config(b'web', b'description', None, untrusted=True)
 
        return safe_str(_desc or b'unknown')
 

	
 
    @LazyProperty
 
    def contact(self):
 
        return safe_str(mercurial.hgweb.common.get_contact(self._repo.ui.config)
 
                            or b'Unknown')
 

	
 
    @LazyProperty
 
    def last_change(self):
 
        """
 
        Returns last change made on this repository as datetime object
 
        """
 
        return date_fromtimestamp(self._get_mtime(), makedate()[1])
 

	
 
    def _get_mtime(self):
 
        try:
 
            return time.mktime(self.get_changeset().date.timetuple())
 
        except RepositoryError:
 
            # fallback to filesystem
 
            cl_path = os.path.join(self.path, '.hg', "00changelog.i")
 
            st_path = os.path.join(self.path, '.hg', "store")
 
            if os.path.exists(cl_path):
 
                return os.stat(cl_path).st_mtime
 
            else:
 
                return os.stat(st_path).st_mtime
 

	
 
    def _get_revision(self, revision):
 
        """
 
        Given any revision identifier, returns a 40 char string with revision hash.
 

	
 
        :param revision: str or int or None
 
        """
 
        if self._empty:
 
            raise EmptyRepositoryError("There are no changesets yet")
 

	
 
        if revision in [-1, None]:
 
            revision = b'tip'
 
        elif isinstance(revision, str):
 
            revision = safe_bytes(revision)
 

	
 
        try:
 
            if isinstance(revision, int):
 
                return ascii_str(self._repo[revision].hex())
 
            return ascii_str(mercurial.scmutil.revsymbol(self._repo, revision).hex())
 
        except (IndexError, ValueError, mercurial.error.RepoLookupError, TypeError):
 
            msg = "Revision %r does not exist for %s" % (safe_str(revision), self.name)
 
            raise ChangesetDoesNotExistError(msg)
 
        except (LookupError, ):
 
            msg = "Ambiguous identifier `%s` for %s" % (safe_str(revision), self.name)
 
            raise ChangesetDoesNotExistError(msg)
 

	
 
    def get_ref_revision(self, ref_type, ref_name):
 
        """
 
        Returns revision number for the given reference.
 
        """
 
        if ref_type == 'rev' and not ref_name.strip('0'):
 
            return self.EMPTY_CHANGESET
 
        # lookup up the exact node id
 
        _revset_predicates = {
 
                'branch': 'branch',
 
                'book': 'bookmark',
 
                'tag': 'tag',
 
                'rev': 'id',
 
            }
 
        # avoid expensive branch(x) iteration over whole repo
 
        rev_spec = "%%s & %s(%%s)" % _revset_predicates[ref_type]
 
        try:
 
            revs = self._repo.revs(rev_spec, ref_name, ref_name)
 
        except LookupError:
 
            msg = "Ambiguous identifier %s:%s for %s" % (ref_type, ref_name, self.name)
 
            raise ChangesetDoesNotExistError(msg)
 
        except mercurial.error.RepoLookupError:
 
            msg = "Revision %s:%s does not exist for %s" % (ref_type, ref_name, self.name)
 
            raise ChangesetDoesNotExistError(msg)
 
        if revs:
 
            revision = revs.last()
 
        else:
 
            # TODO: just report 'not found'?
 
            revision = ref_name
 

	
 
        return self._get_revision(revision)
 

	
 
    def _get_archives(self, archive_name='tip'):
 
        allowed = self.baseui.configlist(b"web", b"allow_archive",
 
                                         untrusted=True)
 
        for name, ext in [(b'zip', '.zip'), (b'gz', '.tar.gz'), (b'bz2', '.tar.bz2')]:
 
            if name in allowed or self._repo.ui.configbool(b"web",
 
                                                           b"allow" + name,
 
                                                           untrusted=True):
 
                yield {"type": safe_str(name), "extension": ext, "node": archive_name}
 

	
 
    def _get_url(self, url):
 
        """
 
        Returns normalized url. If schema is not given, fall back to
 
        filesystem (``file:///``) schema.
 
        """
 
        if url != 'default' and '://' not in url:
 
            url = "file:" + urllib.request.pathname2url(url)
 
        return url
 

	
 
    def get_changeset(self, revision=None):
 
        """
 
        Returns ``MercurialChangeset`` object representing repository's
 
        changeset at the given ``revision``.
kallithea/model/async_tasks.py
Show inline comments
 
@@ -93,199 +93,192 @@ def get_commits_stats(repo_name, ts_min_
 
        scm_repo = db_repo.scm_instance
 
        repo_size = scm_repo.count()
 
        # return if repo have no revisions
 
        if repo_size < 1:
 
            lock.release()
 
            return True
 

	
 
        skip_date_limit = True
 
        parse_limit = int(config.get('commit_parse_limit'))
 
        last_rev = None
 
        last_cs = None
 
        timegetter = itemgetter('time')
 

	
 
        dbrepo = DBS.query(db.Repository) \
 
            .filter(db.Repository.repo_name == repo_name).scalar()
 
        cur_stats = DBS.query(db.Statistics) \
 
            .filter(db.Statistics.repository == dbrepo).scalar()
 

	
 
        if cur_stats is not None:
 
            last_rev = cur_stats.stat_on_revision
 

	
 
        if last_rev == scm_repo.get_changeset().revision and repo_size > 1:
 
            # pass silently without any work if we're not on first revision or
 
            # current state of parsing revision(from db marker) is the
 
            # last revision
 
            lock.release()
 
            return True
 

	
 
        if cur_stats:
 
            commits_by_day_aggregate = OrderedDict(ext_json.loads(
 
                                        cur_stats.commit_activity_combined))
 
            co_day_auth_aggr = ext_json.loads(cur_stats.commit_activity)
 

	
 
        log.debug('starting parsing %s', parse_limit)
 

	
 
        last_rev = last_rev + 1 if last_rev and last_rev >= 0 else 0
 
        log.debug('Getting revisions from %s to %s',
 
             last_rev, last_rev + parse_limit
 
        )
 
        for cs in scm_repo[last_rev:last_rev + parse_limit]:
 
            log.debug('parsing %s', cs)
 
            last_cs = cs  # remember last parsed changeset
 
            tt = cs.date.timetuple()
 
            k = mktime(tt[:3] + (0, 0, 0, 0, 0, 0))
 

	
 
            if akc(cs.author) in co_day_auth_aggr:
 
                try:
 
                    l = [timegetter(x) for x in
 
                         co_day_auth_aggr[akc(cs.author)]['data']]
 
                    time_pos = l.index(k)
 
                except ValueError:
 
                    time_pos = None
 

	
 
                if time_pos is not None and time_pos >= 0:
 
                    datadict = \
 
                        co_day_auth_aggr[akc(cs.author)]['data'][time_pos]
 

	
 
                    datadict["commits"] += 1
 
                    datadict["added"] += len(cs.added)
 
                    datadict["changed"] += len(cs.changed)
 
                    datadict["removed"] += len(cs.removed)
 

	
 
                else:
 
                    if k >= ts_min_y and k <= ts_max_y or skip_date_limit:
 

	
 
                        datadict = {"time": k,
 
                                    "commits": 1,
 
                                    "added": len(cs.added),
 
                                    "changed": len(cs.changed),
 
                                    "removed": len(cs.removed),
 
                                   }
 
                        co_day_auth_aggr[akc(cs.author)]['data'] \
 
                            .append(datadict)
 

	
 
            else:
 
                if k >= ts_min_y and k <= ts_max_y or skip_date_limit:
 
                    co_day_auth_aggr[akc(cs.author)] = {
 
                                        "label": akc(cs.author),
 
                                        "data": [{"time": k,
 
                                                 "commits": 1,
 
                                                 "added": len(cs.added),
 
                                                 "changed": len(cs.changed),
 
                                                 "removed": len(cs.removed),
 
                                                 }],
 
                                        "schema": ["commits"],
 
                                        }
 

	
 
            # gather all data by day
 
            if k in commits_by_day_aggregate:
 
                commits_by_day_aggregate[k] += 1
 
            else:
 
                commits_by_day_aggregate[k] = 1
 

	
 
        overview_data = sorted(commits_by_day_aggregate.items(),
 
                               key=itemgetter(0))
 

	
 
        if not co_day_auth_aggr:
 
            co_day_auth_aggr[akc(scm_repo.contact)] = {
 
                "label": akc(scm_repo.contact),
 
                "data": [0, 1],
 
                "schema": ["commits"],
 
            }
 

	
 
        stats = cur_stats if cur_stats else db.Statistics()
 
        stats.commit_activity = ascii_bytes(ext_json.dumps(co_day_auth_aggr))
 
        stats.commit_activity_combined = ascii_bytes(ext_json.dumps(overview_data))
 

	
 
        log.debug('last revision %s', last_rev)
 
        leftovers = len(scm_repo.revisions[last_rev:])
 
        log.debug('revisions to parse %s', leftovers)
 

	
 
        if last_rev == 0 or leftovers < parse_limit:
 
            log.debug('getting code trending stats')
 
            stats.languages = ascii_bytes(ext_json.dumps(__get_codes_stats(repo_name)))
 

	
 
        try:
 
            stats.repository = dbrepo
 
            stats.stat_on_revision = last_cs.revision if last_cs else 0
 
            DBS.add(stats)
 
            DBS.commit()
 
        except:
 
            log.error(traceback.format_exc())
 
            DBS.rollback()
 
            lock.release()
 
            return False
 

	
 
        # final release
 
        lock.release()
 

	
 
        # execute another task if celery is enabled
 
        if len(scm_repo.revisions) > 1 and kallithea.CELERY_APP and recurse_limit > 0:
 
            get_commits_stats(repo_name, ts_min_y, ts_max_y, recurse_limit - 1)
 
        elif recurse_limit <= 0:
 
            log.debug('Not recursing - limit has been reached')
 
        else:
 
            log.debug('Not recursing')
 
    except celerylib.LockHeld:
 
        log.info('Task with key %s already running', lockkey)
 
        return 'Task with key %s already running' % lockkey
 

	
 

	
 
@celerylib.task
 
@celerylib.dbsession
 
def send_email(recipients, subject, body='', html_body='', headers=None, from_name=None):
 
    """
 
    Sends an email with defined parameters from the .ini files.
 

	
 
    :param recipients: list of recipients, if this is None, the defined email
 
        address from field 'email_to' and all admins is used instead
 
    :param subject: subject of the mail
 
    :param body: plain text body of the mail
 
    :param html_body: html version of body
 
    :param headers: dictionary of prepopulated e-mail headers
 
    :param from_name: full name to be used as sender of this mail - often a
 
    .full_name_or_username value
 
    """
 
    assert isinstance(recipients, list), recipients
 
    if headers is None:
 
        headers = {}
 
    else:
 
        # do not modify the original headers object passed by the caller
 
        headers = headers.copy()
 

	
 
    email_config = config
 
    email_prefix = email_config.get('email_prefix', '')
 
    if email_prefix:
 
        subject = "%s %s" % (email_prefix, subject)
 

	
 
    if not recipients:
 
        # if recipients are not defined we send to email_config + all admins
 
        recipients = [u.email for u in db.User.query()
 
                      .filter(db.User.admin == True).all()]
 
        if email_config.get('email_to') is not None:
 
            recipients += email_config.get('email_to').split(',')
 

	
 
        # If there are still no recipients, there are no admins and no address
 
        # configured in email_to, so return.
 
        if not recipients:
 
            log.error("No recipients specified and no fallback available.")
 
            return False
 

	
 
        log.warning("No recipients specified for '%s' - sending to admins %s", subject, ' '.join(recipients))
 

	
 
    # SMTP sender
 
    app_email_from = email_config.get('app_email_from', 'Kallithea')
 
    # 'From' header
 
    if from_name is not None:
 
        # set From header based on from_name but with a generic e-mail address
 
        # In case app_email_from is in "Some Name <e-mail>" format, we first
 
        # extract the e-mail address.
 
        envelope_addr = author_email(app_email_from)
 
        headers['From'] = '"%s" <%s>' % (
 
            email.utils.quote('%s (no-reply)' % from_name),
 
            envelope_addr)
 

	
 
    smtp_server = email_config.get('smtp_server')
 
    smtp_port = email_config.get('smtp_port')
 
    smtp_use_tls = asbool(email_config.get('smtp_use_tls'))
 
    smtp_use_ssl = asbool(email_config.get('smtp_use_ssl'))
0 comments (0 inline, 0 general)