kallithea Changeset - 911669ac48a5

Changeset - 911669ac48a5

Parent rev.

Child rev.

[Not reviewed]

default

0 4 0

Mads Kiilerich (mads) - 5 years ago 2020-11-12 18:56:53
mads@kiilerich.com

Grafted from: 4802bd37c2e9

vcs: drop repo contact ... and the odd uses of it

A half-baked and essentially unused hgweb feature.

The reference in get_commits_stats doesn't seem reachable in any relevant use
cases.

4 files changed with 0 insertions and 19 deletions:

kallithea/lib/indexers/daemon.py

kallithea/lib/vcs/backends/git/repository.py

kallithea/lib/vcs/backends/hg/repository.py

kallithea/model/async_tasks.py

0 comments (0 inline, 0 general)

kallithea/lib/indexers/daemon.py

➞

Show inline comments

@@ @@ -6,424 +6,422 @@ @@
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 kallithea.lib.indexers.daemon
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 A daemon will read from task table and run tasks
 This file was forked by the Kallithea project in July 2014.
 Original author and date, and relevant copyright and licensing information is below:
 :created_on: Jan 26, 2010
 :author: marcink
 :copyright: (c) 2013 RhodeCode GmbH, and others.
 :license: GPLv3, see LICENSE.md for more details.
 """
 import logging
 import os
 import sys
 import traceback
 from os.path import dirname
 from shutil import rmtree
 from time import mktime
 from whoosh.index import create_in, exists_in, open_dir
 from whoosh.qparser import QueryParser
 from kallithea.lib.conf import INDEX_EXTENSIONS, INDEX_FILENAMES
 from kallithea.lib.indexers import CHGSET_IDX_NAME, CHGSETS_SCHEMA, IDX_NAME, SCHEMA
 from kallithea.lib.utils2 import safe_str
 from kallithea.lib.vcs.exceptions import ChangesetDoesNotExistError, ChangesetError, NodeDoesNotExistError, RepositoryError
 from kallithea.model import db
 from kallithea.model.scm import ScmModel
 # Add location of top level folder to sys.path
 project_path = dirname(dirname(dirname(dirname(os.path.realpath(__file__)))))
 sys.path.append(project_path)
 log = logging.getLogger('whoosh_indexer')
 class WhooshIndexingDaemon(object):
     """
     Daemon for atomic indexing jobs
     """
     def __init__(self, indexname=IDX_NAME, index_location=None,
                  repo_location=None, repo_list=None,
                  repo_update_list=None):
         self.indexname = indexname
         self.index_location = index_location
         if not index_location:
             raise Exception('You have to provide index location')
         self.repo_location = repo_location
         if not repo_location:
             raise Exception('You have to provide repositories location')
         self.repo_paths = ScmModel().repo_scan(self.repo_location)
         # filter repo list
         if repo_list:
             repo_list = set(repo_list)
             self.filtered_repo_paths = {}
             for repo_name, repo in self.repo_paths.items():
                 if repo_name in repo_list:
                     self.filtered_repo_paths[repo_name] = repo
             self.repo_paths = self.filtered_repo_paths
         # filter update repo list
         self.filtered_repo_update_paths = {}
         if repo_update_list:
             self.filtered_repo_update_paths = {}
             for repo_name, repo in self.repo_paths.items():
                 if repo_name in repo_update_list:
                     self.filtered_repo_update_paths[repo_name] = repo
             self.repo_paths = self.filtered_repo_update_paths
         self.initial = True
         if not os.path.isdir(self.index_location):
             os.makedirs(self.index_location)
             log.info('Cannot run incremental index since it does not '
                      'yet exist - running full build')
         elif not exists_in(self.index_location, IDX_NAME):
             log.info('Running full index build, as the file content '
                      'index does not exist')
         elif not exists_in(self.index_location, CHGSET_IDX_NAME):
             log.info('Running full index build, as the changeset '
                      'index does not exist')
         else:
             self.initial = False
     def _get_index_revision(self, repo):
         db_repo = db.Repository.get_by_repo_name(repo.name)
         landing_rev = 'tip'
         if db_repo:
             _rev_type, _rev = db_repo.landing_rev
             landing_rev = _rev
         return landing_rev
     def _get_index_changeset(self, repo, index_rev=None):
         if not index_rev:
             index_rev = self._get_index_revision(repo)
         cs = repo.get_changeset(index_rev)
         return cs
     def get_paths(self, repo):
         """
         recursive walk in root dir and return a set of all path in that dir
         based on repository walk function
         """
         index_paths_ = set()
         try:
             cs = self._get_index_changeset(repo)
             for _topnode, _dirs, files in cs.walk('/'):
                 for f in files:
                     index_paths_.add(os.path.join(repo.path, f.path))
         except RepositoryError:
             log.debug(traceback.format_exc())
             pass
         return index_paths_
     def get_node(self, repo, path, index_rev=None):
         """
         gets a filenode based on given full path.
         :param repo: scm repo instance
         :param path: full path including root location
         :return: FileNode
         """
         # FIXME: paths should be normalized ... or even better: don't include repo.path
         assert path.startswith(repo.path)
         assert path[len(repo.path)] in (os.path.sep, os.path.altsep)
         node_path = path[len(repo.path) + 1:]
         cs = self._get_index_changeset(repo, index_rev=index_rev)
         node = cs.get_node(node_path)
         return node
     def is_indexable_node(self, node):
         """
         Just index the content of chosen files, skipping binary files
         """
         return (node.extension in INDEX_EXTENSIONS or node.name in INDEX_FILENAMES) and \
                not node.is_binary
     def get_node_mtime(self, node):
         return mktime(node.last_changeset.date.timetuple())
     def add_doc(self, writer, path, repo, repo_name, index_rev=None):
         """
         Adding doc to writer this function itself fetches data from
         the instance of vcs backend
         """
         try:
             node = self.get_node(repo, path, index_rev)
         except (ChangesetError, NodeDoesNotExistError):
             log.debug("    >> %s - not found in %s %s", path, repo, index_rev)
             return 0, 0
         indexed = indexed_w_content = 0
         if self.is_indexable_node(node):
             bytes_content = node.content
             if b'\0' in bytes_content:
                 log.warning('    >> %s - no text content', path)
                 u_content = ''
             else:
                 log.debug('    >> %s', path)
                 u_content = safe_str(bytes_content)
                 indexed_w_content += 1
         else:
             log.debug('    >> %s - not indexable', path)
             # just index file name without it's content
             u_content = ''
             indexed += 1
         writer.add_document(
             fileid=path,
             owner=repo.contact,
             repository_rawname=repo_name,
             repository=repo_name,
             path=path,
             content=u_content,
             modtime=self.get_node_mtime(node),
             extension=node.extension
+        )
         return indexed, indexed_w_content
     def index_changesets(self, writer, repo_name, repo, start_rev=None):
         """
         Add all changeset in the vcs repo starting at start_rev
         to the index writer
         :param writer: the whoosh index writer to add to
         :param repo_name: name of the repository from whence the
           changeset originates including the repository group
         :param repo: the vcs repository instance to index changesets for,
           the presumption is the repo has changesets to index
         :param start_rev=None: the full sha id to start indexing from
           if start_rev is None then index from the first changeset in
           the repo
         """
         if start_rev is None:
             start_rev = repo[0].raw_id
         log.debug('Indexing changesets in %s, starting at rev %s',
                   repo_name, start_rev)
         indexed = 0
         cs_iter = repo.get_changesets(start=start_rev)
         total = len(cs_iter)
         for cs in cs_iter:
             indexed += 1
             log.debug('    >> %s %s/%s', cs, indexed, total)
             writer.add_document(
                 raw_id=cs.raw_id,
                 owner=repo.contact,
                 date=cs._timestamp,
                 repository_rawname=repo_name,
                 repository=repo_name,
                 author=cs.author,
                 message=cs.message,
                 last=cs.last,
                 added=' '.join(node.path for node in cs.added).lower(),
                 removed=' '.join(node.path for node in cs.removed).lower(),
                 changed=' '.join(node.path for node in cs.changed).lower(),
                 parents=' '.join(cs.raw_id for cs in cs.parents),
+            )
         return indexed
     def index_files(self, file_idx_writer, repo_name, repo):
         """
         Index files for given repo_name
         :param file_idx_writer: the whoosh index writer to add to
         :param repo_name: name of the repository we're indexing
         :param repo: instance of vcs repo
         """
         i_cnt = iwc_cnt = 0
         log.debug('Building file index for %s @revision:%s', repo_name,
                                                 self._get_index_revision(repo))
         index_rev = self._get_index_revision(repo)
         for idx_path in self.get_paths(repo):
             i, iwc = self.add_doc(file_idx_writer, idx_path, repo, repo_name, index_rev)
             i_cnt += i
             iwc_cnt += iwc
         log.debug('added %s files %s with content for repo %s',
                   i_cnt + iwc_cnt, iwc_cnt, repo.path)
         return i_cnt, iwc_cnt
     def update_changeset_index(self):
         idx = open_dir(self.index_location, indexname=CHGSET_IDX_NAME)
         with idx.searcher() as searcher:
             writer = idx.writer()
             writer_is_dirty = False
             try:
                 indexed_total = 0
                 repo_name = None
                 for repo_name, repo in sorted(self.repo_paths.items()):
                     log.debug('Updating changeset index for repo %s', repo_name)
                     # skip indexing if there aren't any revs in the repo
                     num_of_revs = len(repo)
                     if num_of_revs < 1:
                         continue
                     qp = QueryParser('repository', schema=CHGSETS_SCHEMA)
                     q = qp.parse("last:t AND %s" % repo_name)
                     results = searcher.search(q)
                     # default to scanning the entire repo
                     last_rev = 0
                     start_id = None
                     if len(results) > 0:
                         # assuming that there is only one result, if not this
                         # may require a full re-index.
                         start_id = results[0]['raw_id']
                         try:
                             last_rev = repo.get_changeset(revision=start_id).revision
                         except ChangesetDoesNotExistError:
                             log.error('previous last revision %s not found - indexing from scratch', start_id)
                             start_id = None
                     # there are new changesets to index or a new repo to index
                     if last_rev == 0 or num_of_revs > last_rev + 1:
                         # delete the docs in the index for the previous
                         # last changeset(s)
                         for hit in results:
                             q = qp.parse("last:t AND %s AND raw_id:%s" %
                                             (repo_name, hit['raw_id']))
                             writer.delete_by_query(q)
                         # index from the previous last changeset + all new ones
                         indexed_total += self.index_changesets(writer,
                                                 repo_name, repo, start_id)
                         writer_is_dirty = True
                 log.debug('indexed %s changesets for repo %s',
                           indexed_total, repo_name
+                )
             finally:
                 if writer_is_dirty:
                     log.debug('>> COMMITING CHANGES TO CHANGESET INDEX<<')
                     writer.commit(merge=True)
                     log.debug('>>> FINISHED REBUILDING CHANGESET INDEX <<<')
                 else:
                     log.debug('>> NOTHING TO COMMIT TO CHANGESET INDEX<<')
     def update_file_index(self):
         log.debug('STARTING INCREMENTAL INDEXING UPDATE FOR EXTENSIONS %s '
                   'AND REPOS %s', INDEX_EXTENSIONS, ' and '.join(self.repo_paths))
         idx = open_dir(self.index_location, indexname=self.indexname)
         # The set of all paths in the index
         indexed_paths = set()
         # The set of all paths we need to re-index
         to_index = set()
         writer = idx.writer()
         writer_is_dirty = False
         try:
             with idx.reader() as reader:
                 # Loop over the stored fields in the index
                 for fields in reader.all_stored_fields():
                     indexed_path = fields['path']
                     indexed_repo_path = fields['repository']
                     indexed_paths.add(indexed_path)
                     if indexed_repo_path not in self.filtered_repo_update_paths:
                         continue
                     repo = self.repo_paths[indexed_repo_path]
                     try:
                         node = self.get_node(repo, indexed_path)
                         # Check if this file was changed since it was indexed
                         indexed_time = fields['modtime']
                         mtime = self.get_node_mtime(node)
                         if mtime > indexed_time:
                             # The file has changed, delete it and add it to
                             # the list of files to reindex
                             log.debug(
                                 'adding to reindex list %s mtime: %s vs %s',
                                     indexed_path, mtime, indexed_time
+                            )
                             writer.delete_by_term('fileid', indexed_path)
                             writer_is_dirty = True
                             to_index.add(indexed_path)
                     except (ChangesetError, NodeDoesNotExistError):
                         # This file was deleted since it was indexed
                         log.debug('removing from index %s', indexed_path)
                         writer.delete_by_term('path', indexed_path)
                         writer_is_dirty = True
             # Loop over the files in the filesystem
             # Assume we have a function that gathers the filenames of the
             # documents to be indexed
             ri_cnt_total = 0  # indexed
             riwc_cnt_total = 0  # indexed with content
             for repo_name, repo in sorted(self.repo_paths.items()):
                 log.debug('Updating file index for repo %s', repo_name)
                 # skip indexing if there aren't any revisions
                 if len(repo) < 1:
                     continue
                 ri_cnt = 0   # indexed
                 riwc_cnt = 0  # indexed with content
                 for path in self.get_paths(repo):
                     if path in to_index or path not in indexed_paths:
                         # This is either a file that's changed, or a new file
                         # that wasn't indexed before. So index it!
                         i, iwc = self.add_doc(writer, path, repo, repo_name)
                         writer_is_dirty = True
                         ri_cnt += i
                         ri_cnt_total += 1
                         riwc_cnt += iwc
                         riwc_cnt_total += iwc
                 log.debug('added %s files %s with content for repo %s',
                              ri_cnt + riwc_cnt, riwc_cnt, repo.path
+                )
             log.debug('indexed %s files in total and %s with content',
                         ri_cnt_total, riwc_cnt_total
+            )
         finally:
             if writer_is_dirty:
                 log.debug('>> COMMITING CHANGES TO FILE INDEX <<')
                 writer.commit(merge=True)
                 log.debug('>>> FINISHED REBUILDING FILE INDEX <<<')
             else:
                 log.debug('>> NOTHING TO COMMIT TO FILE INDEX <<')
                 writer.cancel()
     def build_indexes(self):
         if os.path.exists(self.index_location):
             log.debug('removing previous index')
             rmtree(self.index_location)
         if not os.path.exists(self.index_location):
             os.mkdir(self.index_location)
         chgset_idx = create_in(self.index_location, CHGSETS_SCHEMA,
                                indexname=CHGSET_IDX_NAME)
         chgset_idx_writer = chgset_idx.writer()
         file_idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME)

kallithea/lib/vcs/backends/git/repository.py

➞

Show inline comments

@@ @@ -199,389 +199,384 @@ class GitRepository(BaseRepository): @@
             parsed_url.port  # trigger netloc parsing which might raise ValueError
         except ValueError:
             raise urllib.error.URLError("Error parsing URL: %r" % url)
         # check first if it's not an local url
         if os.path.isabs(url) and os.path.isdir(url):
             return
         if parsed_url.scheme == 'git':
             # Mitigate problems elsewhere with incorrect handling of encoded paths.
             # Don't trust urllib.parse.unquote but be prepared for more flexible implementations elsewhere.
             # Space is the only allowed whitespace character - directly or % encoded. No other % or \ is allowed.
             for c in parsed_url.path.replace('%20', ' '):
                 if c in '%\\':
                     raise urllib.error.URLError("Invalid escape character in path: '%s'" % c)
                 if c.isspace() and c != ' ':
                     raise urllib.error.URLError("Invalid whitespace character in path: %r" % c)
             return
         if parsed_url.scheme not in ['http', 'https']:
             raise urllib.error.URLError("Unsupported protocol in URL %r" % url)
         url_obj = mercurial.util.url(safe_bytes(url))
         test_uri, handlers = get_urllib_request_handlers(url_obj)
         if not test_uri.endswith(b'info/refs'):
             test_uri = test_uri.rstrip(b'/') + b'/info/refs'
         url_obj.passwd = b'*****'
         cleaned_uri = str(url_obj)
         o = urllib.request.build_opener(*handlers)
         o.addheaders = [('User-Agent', 'git/1.7.8.0')]  # fake some git
         req = urllib.request.Request(
             "%s?%s" % (
                 safe_str(test_uri),
                 urllib.parse.urlencode({"service": 'git-upload-pack'})
             ))
         try:
             resp = o.open(req)
             if resp.code != 200:
                 raise Exception('Return Code is not 200')
         except Exception as e:
             # means it cannot be cloned
             raise urllib.error.URLError("[%s] org_exc: %s" % (cleaned_uri, e))
         # now detect if it's proper git repo
         gitdata = resp.read()
         if b'service=git-upload-pack' not in gitdata:
             raise urllib.error.URLError(
                 "url [%s] does not look like an git" % cleaned_uri)
     def _get_repo(self, create, src_url=None, update_after_clone=False,
                   bare=False):
         if create and os.path.exists(self.path):
             raise RepositoryError("Location already exist")
         if src_url and not create:
             raise RepositoryError("Create should be set to True if src_url is "
                                   "given (clone operation creates repository)")
         try:
             if create and src_url:
                 GitRepository._check_url(src_url)
                 self.clone(src_url, update_after_clone, bare)
                 return Repo(self.path)
             elif create:
                 os.makedirs(self.path)
                 if bare:
                     return Repo.init_bare(self.path)
                 else:
                     return Repo.init(self.path)
             else:
                 return Repo(self.path)
         except (NotGitRepository, OSError) as err:
             raise RepositoryError(err)
     def _get_all_revisions(self):
         # we must check if this repo is not empty, since later command
         # fails if it is. And it's cheaper to ask than throw the subprocess
         # errors
         try:
             self._repo.head()
         except KeyError:
             return []
         rev_filter = settings.GIT_REV_FILTER
         cmd = ['rev-list', rev_filter, '--reverse', '--date-order']
         try:
             so = self.run_git_command(cmd)
         except RepositoryError:
             # Can be raised for empty repositories
             return []
         return so.splitlines()
     def _get_all_revisions2(self):
         # alternate implementation using dulwich
         includes = [ascii_str(sha) for key, (sha, type_) in self._parsed_refs.items()
                     if type_ != b'T']
         return [c.commit.id for c in self._repo.get_walker(include=includes)]
     def _get_revision(self, revision):
         """
         Given any revision identifier, returns a 40 char string with revision hash.
         """
         if self._empty:
             raise EmptyRepositoryError("There are no changesets yet")
         if revision in (None, '', 'tip', 'HEAD', 'head', -1):
             revision = -1
         if isinstance(revision, int):
             try:
                 return self.revisions[revision]
             except IndexError:
                 msg = "Revision %r does not exist for %s" % (revision, self.name)
                 raise ChangesetDoesNotExistError(msg)
         if isinstance(revision, str):
             if revision.isdigit() and (len(revision) < 12 or len(revision) == revision.count('0')):
                 try:
                     return self.revisions[int(revision)]
                 except IndexError:
                     msg = "Revision %r does not exist for %s" % (revision, self)
                     raise ChangesetDoesNotExistError(msg)
             # get by branch/tag name
             _ref_revision = self._parsed_refs.get(safe_bytes(revision))
             if _ref_revision:  # and _ref_revision[1] in [b'H', b'RH', b'T']:
                 return ascii_str(_ref_revision[0])
             if revision in self.revisions:
                 return revision
             # maybe it's a tag ? we don't have them in self.revisions
             if revision in self.tags.values():
                 return revision
             if SHA_PATTERN.match(revision):
                 msg = "Revision %r does not exist for %s" % (revision, self.name)
                 raise ChangesetDoesNotExistError(msg)
         raise ChangesetDoesNotExistError("Given revision %r not recognized" % revision)
     def get_ref_revision(self, ref_type, ref_name):
         """
         Returns ``GitChangeset`` object representing repository's
         changeset at the given ``revision``.
         """
         return self._get_revision(ref_name)
     def _get_archives(self, archive_name='tip'):
         for i in [('zip', '.zip'), ('gz', '.tar.gz'), ('bz2', '.tar.bz2')]:
             yield {"type": i[0], "extension": i[1], "node": archive_name}
     def _get_url(self, url):
         """
         Returns normalized url. If schema is not given, would fall to
         filesystem (``file:///``) schema.
         """
         if url != 'default' and '://' not in url:
             url = ':///'.join(('file', url))
         return url
     @LazyProperty
     def name(self):
         return os.path.basename(self.path)
     @LazyProperty
     def last_change(self):
         """
         Returns last change made on this repository as datetime object
         """
         return date_fromtimestamp(self._get_mtime(), makedate()[1])
     def _get_mtime(self):
         try:
             return time.mktime(self.get_changeset().date.timetuple())
         except RepositoryError:
             idx_loc = '' if self.bare else '.git'
             # fallback to filesystem
             in_path = os.path.join(self.path, idx_loc, "index")
             he_path = os.path.join(self.path, idx_loc, "HEAD")
             if os.path.exists(in_path):
                 return os.stat(in_path).st_mtime
             else:
                 return os.stat(he_path).st_mtime
     @LazyProperty
     def description(self):
         return safe_str(self._repo.get_description() or b'unknown')
     @LazyProperty
     def contact(self):
         undefined_contact = 'Unknown'
         return undefined_contact
     @property
     def branches(self):
         if not self.revisions:
             return {}
         _branches = [(safe_str(key), ascii_str(sha))
                      for key, (sha, type_) in self._parsed_refs.items() if type_ == b'H']
         return OrderedDict(sorted(_branches, key=(lambda ctx: ctx[0]), reverse=False))
     @LazyProperty
     def closed_branches(self):
         return {}
     @LazyProperty
     def tags(self):
         return self._get_tags()
     def _get_tags(self):
         if not self.revisions:
             return {}
         _tags = [(safe_str(key), ascii_str(sha))
                  for key, (sha, type_) in self._parsed_refs.items() if type_ == b'T']
         return OrderedDict(sorted(_tags, key=(lambda ctx: ctx[0]), reverse=True))
     def tag(self, name, user, revision=None, message=None, date=None,
             **kwargs):
         """
         Creates and returns a tag for the given ``revision``.
         :param name: name for new tag
         :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
         :param revision: changeset id for which new tag would be created
         :param message: message of the tag's commit
         :param date: date of tag's commit
         :raises TagAlreadyExistError: if tag with same name already exists
         """
         if name in self.tags:
             raise TagAlreadyExistError("Tag %s already exists" % name)
         changeset = self.get_changeset(revision)
         message = message or "Added tag %s for commit %s" % (name,
             changeset.raw_id)
         self._repo.refs[b"refs/tags/%s" % safe_bytes(name)] = changeset._commit.id
         self._parsed_refs = self._get_parsed_refs()
         self.tags = self._get_tags()
         return changeset
     def remove_tag(self, name, user, message=None, date=None):
         """
         Removes tag with the given ``name``.
         :param name: name of the tag to be removed
         :param user: full username, i.e.: "Joe Doe <joe.doe@example.com>"
         :param message: message of the tag's removal commit
         :param date: date of tag's removal commit
         :raises TagDoesNotExistError: if tag with given name does not exists
         """
         if name not in self.tags:
             raise TagDoesNotExistError("Tag %s does not exist" % name)
         # self._repo.refs is a DiskRefsContainer, and .path gives the full absolute path of '.git'
         tagpath = os.path.join(safe_str(self._repo.refs.path), 'refs', 'tags', name)
         try:
             os.remove(tagpath)
             self._parsed_refs = self._get_parsed_refs()
             self.tags = self._get_tags()
         except OSError as e:
             raise RepositoryError(e.strerror)
     @LazyProperty
     def bookmarks(self):
         """
         Gets bookmarks for this repository
         """
         return {}
     @LazyProperty
     def _parsed_refs(self):
         return self._get_parsed_refs()
     def _get_parsed_refs(self):
         """Return refs as a dict, like:
         { b'v0.2.0': [b'599ba911aa24d2981225f3966eb659dfae9e9f30', b'T'] }
         """
         _repo = self._repo
         refs = _repo.get_refs()
         keys = [(b'refs/heads/', b'H'),
                 (b'refs/remotes/origin/', b'RH'),
                 (b'refs/tags/', b'T')]
         _refs = {}
         for ref, sha in refs.items():
             for k, type_ in keys:
                 if ref.startswith(k):
                     _key = ref[len(k):]
                     if type_ == b'T':
                         obj = _repo.get_object(sha)
                         if isinstance(obj, Tag):
                             sha = _repo.get_object(sha).object[1]
                     _refs[_key] = [sha, type_]
                     break
         return _refs
     def _heads(self, reverse=False):
         refs = self._repo.get_refs()
         heads = {}
         for key, val in refs.items():
             for ref_key in [b'refs/heads/', b'refs/remotes/origin/']:
                 if key.startswith(ref_key):
                     n = key[len(ref_key):]
                     if n not in [b'HEAD']:
                         heads[n] = val
         return heads if reverse else dict((y, x) for x, y in heads.items())
     def get_changeset(self, revision=None):
         """
         Returns ``GitChangeset`` object representing commit from git repository
         at the given revision or head (most recent commit) if None given.
         """
         if isinstance(revision, changeset.GitChangeset):
             return revision
         return changeset.GitChangeset(repository=self, revision=self._get_revision(revision))
     def get_changesets(self, start=None, end=None, start_date=None,
            end_date=None, branch_name=None, reverse=False, max_revisions=None):
         """
         Returns iterator of ``GitChangeset`` objects from start to end (both
         are inclusive), in ascending date order (unless ``reverse`` is set).
         :param start: changeset ID, as str; first returned changeset
         :param end: changeset ID, as str; last returned changeset
         :param start_date: if specified, changesets with commit date less than
           ``start_date`` would be filtered out from returned set
         :param end_date: if specified, changesets with commit date greater than
           ``end_date`` would be filtered out from returned set
         :param branch_name: if specified, changesets not reachable from given
           branch would be filtered out from returned set
         :param reverse: if ``True``, returned generator would be reversed
           (meaning that returned changesets would have descending date order)
         :raise BranchDoesNotExistError: If given ``branch_name`` does not
             exist.
         :raise ChangesetDoesNotExistError: If changeset for given ``start`` or
           ``end`` could not be found.
         """
         if branch_name and branch_name not in self.branches:
             raise BranchDoesNotExistError("Branch '%s' not found"
                                           % branch_name)
         # actually we should check now if it's not an empty repo to not spaw
         # subprocess commands
         if self._empty:
             raise EmptyRepositoryError("There are no changesets yet")
         # %H at format means (full) commit hash, initial hashes are retrieved
         # in ascending date order
         cmd = ['log', '--date-order', '--reverse', '--pretty=format:%H']
         if max_revisions:
             cmd += ['--max-count=%s' % max_revisions]
         if start_date:
             cmd += ['--since', start_date.strftime('%m/%d/%y %H:%M:%S')]
         if end_date:
             cmd += ['--until', end_date.strftime('%m/%d/%y %H:%M:%S')]
         if branch_name:
             cmd.append(branch_name)
         else:
             cmd.append(settings.GIT_REV_FILTER)
         revs = self.run_git_command(cmd).splitlines()
         start_pos = 0
         end_pos = len(revs)
         if start:
             _start = self._get_revision(start)
             try:
                 start_pos = revs.index(_start)
             except ValueError:
                 pass
         if end is not None:
             _end = self._get_revision(end)
             try:
                 end_pos = revs.index(_end)
             except ValueError:
                 pass
         if None not in [start, end] and start_pos > end_pos:
             raise RepositoryError('start cannot be after end')
         if end_pos is not None:
             end_pos += 1

kallithea/lib/vcs/backends/hg/repository.py

➞

Show inline comments

@@ @@ -224,389 +224,384 @@ class MercurialRepository(BaseRepository @@
         """
         return self._get_bookmarks()
     def _get_bookmarks(self):
         if self._empty:
             return {}
         return OrderedDict(sorted(
             ((safe_str(n), ascii_str(mercurial.node.hex(h))) for n, h in self._repo._bookmarks.items()),
             reverse=True,
             key=lambda x: x[0],  # sort by name
         ))
     def _get_all_revisions(self):
         return [ascii_str(self._repo[x].hex()) for x in self._repo.filtered(b'visible').changelog.revs()]
     def get_diff(self, rev1, rev2, path='', ignore_whitespace=False,
                   context=3):
         """
         Returns (git like) *diff*, as plain text. Shows changes introduced by
         ``rev2`` since ``rev1``.
         :param rev1: Entry point from which diff is shown. Can be
           ``self.EMPTY_CHANGESET`` - in this case, patch showing all
           the changes since empty state of the repository until ``rev2``
         :param rev2: Until which revision changes should be shown.
         :param ignore_whitespace: If set to ``True``, would not show whitespace
           changes. Defaults to ``False``.
         :param context: How many lines before/after changed lines should be
           shown. Defaults to ``3``. If negative value is passed-in, it will be
           set to ``0`` instead.
         """
         # Negative context values make no sense, and will result in
         # errors. Ensure this does not happen.
         if context < 0:
             context = 0
         if hasattr(rev1, 'raw_id'):
             rev1 = getattr(rev1, 'raw_id')
         if hasattr(rev2, 'raw_id'):
             rev2 = getattr(rev2, 'raw_id')
         # Check if given revisions are present at repository (may raise
         # ChangesetDoesNotExistError)
         if rev1 != self.EMPTY_CHANGESET:
             self.get_changeset(rev1)
         self.get_changeset(rev2)
         if path:
             file_filter = mercurial.match.exact([safe_bytes(path)])
         else:
             file_filter = None
         return b''.join(mercurial.patch.diff(self._repo, rev1, rev2, match=file_filter,
                           opts=mercurial.mdiff.diffopts(git=True,
                                         showfunc=True,
                                         ignorews=ignore_whitespace,
                                         context=context)))
     @staticmethod
     def _check_url(url, repoui=None):
         r"""
         Raise URLError if url doesn't seem like a valid safe Hg URL. We
         only allow http, https, ssh, and hg-git URLs.
         For http, https and git URLs, make a connection and probe to see if it is valid.
         On failures it'll raise urllib2.HTTPError, exception is also thrown
         when the return code is non 200
         >>> MercurialRepository._check_url('file:///repo')
         >>> MercurialRepository._check_url('http://example.com:65537/repo')
         Traceback (most recent call last):
         ...
         urllib.error.URLError: <urlopen error Error parsing URL: 'http://example.com:65537/repo'>
         >>> MercurialRepository._check_url('foo')
         Traceback (most recent call last):
         ...
         urllib.error.URLError: <urlopen error Unsupported protocol in URL 'foo'>
         >>> MercurialRepository._check_url('git+ssh://example.com/my%20fine repo')
         Traceback (most recent call last):
         ...
         urllib.error.URLError: <urlopen error Unsupported protocol in URL 'git+ssh://example.com/my%20fine repo'>
         >>> MercurialRepository._check_url('svn+http://example.com/repo')
         Traceback (most recent call last):
         ...
         urllib.error.URLError: <urlopen error Unsupported protocol in URL 'svn+http://example.com/repo'>
         """
         try:
             parsed_url = urllib.parse.urlparse(url)
             parsed_url.port  # trigger netloc parsing which might raise ValueError
         except ValueError:
             raise urllib.error.URLError("Error parsing URL: %r" % url)
         # check first if it's not an local url
         if os.path.isabs(url) and os.path.isdir(url) or parsed_url.scheme == 'file':
             # When creating repos, _get_url will use file protocol for local paths
             return
         if parsed_url.scheme not in ['http', 'https', 'ssh', 'git+http', 'git+https']:
             raise urllib.error.URLError("Unsupported protocol in URL %r" % url)
         url = safe_bytes(url)
         if parsed_url.scheme == 'ssh':
             # in case of invalid uri or authentication issues, sshpeer will
             # throw an exception.
             mercurial.sshpeer.instance(repoui or mercurial.ui.ui(), url, False).lookup(b'tip')
             return
         if '+' in parsed_url.scheme:  # strip 'git+' for hg-git URLs
             url = url.split(b'+', 1)[1]
         url_obj = mercurial.util.url(url)
         test_uri, handlers = get_urllib_request_handlers(url_obj)
         url_obj.passwd = b'*****'
         cleaned_uri = str(url_obj)
         o = urllib.request.build_opener(*handlers)
         o.addheaders = [('Content-Type', 'application/mercurial-0.1'),
                         ('Accept', 'application/mercurial-0.1')]
         req = urllib.request.Request(
             "%s?%s" % (
                 safe_str(test_uri),
                 urllib.parse.urlencode({
                     'cmd': 'between',
                     'pairs': "%s-%s" % ('0' * 40, '0' * 40),
                 })
             ))
         try:
             resp = o.open(req)
             if resp.code != 200:
                 raise Exception('Return Code is not 200')
         except Exception as e:
             # means it cannot be cloned
             raise urllib.error.URLError("[%s] org_exc: %s" % (cleaned_uri, e))
         if parsed_url.scheme in ['http', 'https']:  # skip git+http://... etc
             # now check if it's a proper hg repo
             try:
                 mercurial.httppeer.instance(repoui or mercurial.ui.ui(), url, False).lookup(b'tip')
             except Exception as e:
                 raise urllib.error.URLError(
                     "url [%s] does not look like an hg repo org_exc: %s"
                     % (cleaned_uri, e))
     def _get_repo(self, create, src_url=None, update_after_clone=False):
         """
         Function will check for mercurial repository in given path and return
         a localrepo object. If there is no repository in that path it will
         raise an exception unless ``create`` parameter is set to True - in
         that case repository would be created and returned.
         If ``src_url`` is given, would try to clone repository from the
         location at given clone_point. Additionally it'll make update to
         working copy accordingly to ``update_after_clone`` flag
         """
         try:
             if src_url:
                 url = self._get_url(src_url)
                 opts = {}
                 if not update_after_clone:
                     opts.update({'noupdate': True})
                 MercurialRepository._check_url(url, self.baseui)
                 mercurial.commands.clone(self.baseui, safe_bytes(url), safe_bytes(self.path), **opts)
                 # Don't try to create if we've already cloned repo
                 create = False
             return mercurial.localrepo.instance(self.baseui, safe_bytes(self.path), create=create)
         except (mercurial.error.Abort, mercurial.error.RepoError) as err:
             if create:
                 msg = "Cannot create repository at %s. Original error was %s" \
                     % (self.name, err)
             else:
                 msg = "Not valid repository at %s. Original error was %s" \
                     % (self.name, err)
             raise RepositoryError(msg)
     @LazyProperty
     def in_memory_changeset(self):
         return inmemory.MercurialInMemoryChangeset(self)
     @LazyProperty
     def description(self):
         _desc = self._repo.ui.config(b'web', b'description', None, untrusted=True)
         return safe_str(_desc or b'unknown')
     @LazyProperty
     def contact(self):
         return safe_str(mercurial.hgweb.common.get_contact(self._repo.ui.config)
                             or b'Unknown')
     @LazyProperty
     def last_change(self):
         """
         Returns last change made on this repository as datetime object
         """
         return date_fromtimestamp(self._get_mtime(), makedate()[1])
     def _get_mtime(self):
         try:
             return time.mktime(self.get_changeset().date.timetuple())
         except RepositoryError:
             # fallback to filesystem
             cl_path = os.path.join(self.path, '.hg', "00changelog.i")
             st_path = os.path.join(self.path, '.hg', "store")
             if os.path.exists(cl_path):
                 return os.stat(cl_path).st_mtime
             else:
                 return os.stat(st_path).st_mtime
     def _get_revision(self, revision):
         """
         Given any revision identifier, returns a 40 char string with revision hash.
         :param revision: str or int or None
         """
         if self._empty:
             raise EmptyRepositoryError("There are no changesets yet")
         if revision in [-1, None]:
             revision = b'tip'
         elif isinstance(revision, str):
             revision = safe_bytes(revision)
         try:
             if isinstance(revision, int):
                 return ascii_str(self._repo[revision].hex())
             return ascii_str(mercurial.scmutil.revsymbol(self._repo, revision).hex())
         except (IndexError, ValueError, mercurial.error.RepoLookupError, TypeError):
             msg = "Revision %r does not exist for %s" % (safe_str(revision), self.name)
             raise ChangesetDoesNotExistError(msg)
         except (LookupError, ):
             msg = "Ambiguous identifier `%s` for %s" % (safe_str(revision), self.name)
             raise ChangesetDoesNotExistError(msg)
     def get_ref_revision(self, ref_type, ref_name):
         """
         Returns revision number for the given reference.
         """
         if ref_type == 'rev' and not ref_name.strip('0'):
             return self.EMPTY_CHANGESET
         # lookup up the exact node id
         _revset_predicates = {
                 'branch': 'branch',
                 'book': 'bookmark',
                 'tag': 'tag',
                 'rev': 'id',
+            }
         # avoid expensive branch(x) iteration over whole repo
         rev_spec = "%%s & %s(%%s)" % _revset_predicates[ref_type]
         try:
             revs = self._repo.revs(rev_spec, ref_name, ref_name)
         except LookupError:
             msg = "Ambiguous identifier %s:%s for %s" % (ref_type, ref_name, self.name)
             raise ChangesetDoesNotExistError(msg)
         except mercurial.error.RepoLookupError:
             msg = "Revision %s:%s does not exist for %s" % (ref_type, ref_name, self.name)
             raise ChangesetDoesNotExistError(msg)
         if revs:
             revision = revs.last()
         else:
             # TODO: just report 'not found'?
             revision = ref_name
         return self._get_revision(revision)
     def _get_archives(self, archive_name='tip'):
         allowed = self.baseui.configlist(b"web", b"allow_archive",
                                          untrusted=True)
         for name, ext in [(b'zip', '.zip'), (b'gz', '.tar.gz'), (b'bz2', '.tar.bz2')]:
             if name in allowed or self._repo.ui.configbool(b"web",
                                                            b"allow" + name,
                                                            untrusted=True):
                 yield {"type": safe_str(name), "extension": ext, "node": archive_name}
     def _get_url(self, url):
         """
         Returns normalized url. If schema is not given, fall back to
         filesystem (``file:///``) schema.
         """
         if url != 'default' and '://' not in url:
             url = "file:" + urllib.request.pathname2url(url)
         return url
     def get_changeset(self, revision=None):
         """
         Returns ``MercurialChangeset`` object representing repository's
         changeset at the given ``revision``.
         """
         return changeset.MercurialChangeset(repository=self, revision=self._get_revision(revision))
     def get_changesets(self, start=None, end=None, start_date=None,
                        end_date=None, branch_name=None, reverse=False, max_revisions=None):
         """
         Returns iterator of ``MercurialChangeset`` objects from start to end
         (both are inclusive)
         :param start: None, str, int or mercurial lookup format
         :param end:  None, str, int or mercurial lookup format
         :param start_date:
         :param end_date:
         :param branch_name:
         :param reversed: return changesets in reversed order
         """
         start_raw_id = self._get_revision(start)
         start_pos = None if start is None else self.revisions.index(start_raw_id)
         end_raw_id = self._get_revision(end)
         end_pos = None if end is None else self.revisions.index(end_raw_id)
         if start_pos is not None and end_pos is not None and start_pos > end_pos:
             raise RepositoryError("Start revision '%s' cannot be "
                                   "after end revision '%s'" % (start, end))
         if branch_name and branch_name not in self.allbranches:
             msg = "Branch %r not found in %s" % (branch_name, self.name)
             raise BranchDoesNotExistError(msg)
         if end_pos is not None:
             end_pos += 1
         # filter branches
         filter_ = []
         if branch_name:
             filter_.append(b'branch("%s")' % safe_bytes(branch_name))
         if start_date:
             filter_.append(b'date(">%s")' % safe_bytes(str(start_date)))
         if end_date:
             filter_.append(b'date("<%s")' % safe_bytes(str(end_date)))
         if filter_ or max_revisions:
             if filter_:
                 revspec = b' and '.join(filter_)
             else:
                 revspec = b'all()'
             if max_revisions:
                 revspec = b'limit(%s, %d)' % (revspec, max_revisions)
             revisions = mercurial.scmutil.revrange(self._repo, [revspec])
         else:
             revisions = self.revisions
         # this is very much a hack to turn this into a list; a better solution
         # would be to get rid of this function entirely and use revsets
         revs = list(revisions)[start_pos:end_pos]
         if reverse:
             revs.reverse()
         return CollectionGenerator(self, revs)
     def get_diff_changesets(self, org_rev, other_repo, other_rev):
         """
         Returns lists of changesets that can be merged from this repo @org_rev
         to other_repo @other_rev
         ... and the other way
         ... and the ancestors that would be used for merge
         :param org_rev: the revision we want our compare to be made
         :param other_repo: repo object, most likely the fork of org_repo. It has
             all changesets that we need to obtain
         :param other_rev: revision we want out compare to be made on other_repo
         """
         ancestors = None
         if org_rev == other_rev:
             org_changesets = []
             other_changesets = []
         else:
             # case two independent repos
             if self != other_repo:
                 hgrepo = mercurial.unionrepo.makeunionrepository(other_repo.baseui,
                                                        safe_bytes(other_repo.path),
                                                        safe_bytes(self.path))
                 # all ancestors of other_rev will be in other_repo and
                 # rev numbers from hgrepo can be used in other_repo - org_rev ancestors cannot
             # no remote compare do it on the same repository
             else:
                 hgrepo = other_repo._repo
             ancestors = [ascii_str(hgrepo[ancestor].hex()) for ancestor in
                          hgrepo.revs(b"id(%s) & ::id(%s)", ascii_bytes(other_rev), ascii_bytes(org_rev))]
             if ancestors:
                 log.debug("shortcut found: %s is already an ancestor of %s", other_rev, org_rev)
             else:
                 log.debug("no shortcut found: %s is not an ancestor of %s", other_rev, org_rev)
                 ancestors = [ascii_str(hgrepo[ancestor].hex()) for ancestor in
                              hgrepo.revs(b"heads(::id(%s) & ::id(%s))", ascii_bytes(org_rev), ascii_bytes(other_rev))] # FIXME: expensive!

kallithea/model/async_tasks.py

➞

Show inline comments

 # -*- coding: utf-8 -*-
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 kallithea.model.async_tasks
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Kallithea task modules, containing all task that suppose to be run
 by celery daemon
 This file was forked by the Kallithea project in July 2014.
 Original author and date, and relevant copyright and licensing information is below:
 :created_on: Oct 6, 2010
 :author: marcink
 :copyright: (c) 2013 RhodeCode GmbH, and others.
 :license: GPLv3, see LICENSE.md for more details.
 """
 import email.message
 import email.utils
 import os
 import smtplib
 import time
 import traceback
 from collections import OrderedDict
 from operator import itemgetter
 from time import mktime
 import celery.utils.log
 from tg import config
 import kallithea
 import kallithea.lib.helpers as h
 from kallithea.lib import celerylib, conf, ext_json, hooks
 from kallithea.lib.indexers.daemon import WhooshIndexingDaemon
 from kallithea.lib.utils2 import asbool, ascii_bytes
 from kallithea.lib.vcs.utils import author_email
 from kallithea.model import db, repo, userlog
 __all__ = ['whoosh_index', 'get_commits_stats', 'send_email']
 log = celery.utils.log.get_task_logger(__name__)
 @celerylib.task
 @celerylib.locked_task
 @celerylib.dbsession
 def whoosh_index(repo_location, full_index):
     celerylib.get_session() # initialize database connection
     index_location = config['index_dir']
     WhooshIndexingDaemon(index_location=index_location,
                          repo_location=repo_location) \
                          .run(full_index=full_index)
 # for js data compatibility cleans the key for person from '
 def akc(k):
     return h.person(k).replace('"', '')
 @celerylib.task
 @celerylib.dbsession
 def get_commits_stats(repo_name, ts_min_y, ts_max_y, recurse_limit=100):
     DBS = celerylib.get_session()
     lockkey = celerylib.__get_lockkey('get_commits_stats', repo_name, ts_min_y,
                             ts_max_y)
     lockkey_path = config.get('cache_dir') or config['app_conf']['cache_dir']  # Backward compatibility for TurboGears < 2.4
     log.info('running task with lockkey %s', lockkey)
     try:
         lock = celerylib.DaemonLock(os.path.join(lockkey_path, lockkey))
         co_day_auth_aggr = {}
         commits_by_day_aggregate = {}
         db_repo = db.Repository.get_by_repo_name(repo_name)
         if db_repo is None:
             return True
         scm_repo = db_repo.scm_instance
         repo_size = scm_repo.count()
         # return if repo have no revisions
         if repo_size < 1:
             lock.release()
             return True
         skip_date_limit = True
         parse_limit = int(config.get('commit_parse_limit'))
         last_rev = None
         last_cs = None
         timegetter = itemgetter('time')
         dbrepo = DBS.query(db.Repository) \
             .filter(db.Repository.repo_name == repo_name).scalar()
         cur_stats = DBS.query(db.Statistics) \
             .filter(db.Statistics.repository == dbrepo).scalar()
         if cur_stats is not None:
             last_rev = cur_stats.stat_on_revision
         if last_rev == scm_repo.get_changeset().revision and repo_size > 1:
             # pass silently without any work if we're not on first revision or
             # current state of parsing revision(from db marker) is the
             # last revision
             lock.release()
             return True
         if cur_stats:
             commits_by_day_aggregate = OrderedDict(ext_json.loads(
                                         cur_stats.commit_activity_combined))
             co_day_auth_aggr = ext_json.loads(cur_stats.commit_activity)
         log.debug('starting parsing %s', parse_limit)
         last_rev = last_rev + 1 if last_rev and last_rev >= 0 else 0
         log.debug('Getting revisions from %s to %s',
              last_rev, last_rev + parse_limit
+        )
         for cs in scm_repo[last_rev:last_rev + parse_limit]:
             log.debug('parsing %s', cs)
             last_cs = cs  # remember last parsed changeset
             tt = cs.date.timetuple()
             k = mktime(tt[:3] + (0, 0, 0, 0, 0, 0))
             if akc(cs.author) in co_day_auth_aggr:
                 try:
                     l = [timegetter(x) for x in
                          co_day_auth_aggr[akc(cs.author)]['data']]
                     time_pos = l.index(k)
                 except ValueError:
                     time_pos = None
                 if time_pos is not None and time_pos >= 0:
                     datadict = \
                         co_day_auth_aggr[akc(cs.author)]['data'][time_pos]
                     datadict["commits"] += 1
                     datadict["added"] += len(cs.added)
                     datadict["changed"] += len(cs.changed)
                     datadict["removed"] += len(cs.removed)
                 else:
                     if k >= ts_min_y and k <= ts_max_y or skip_date_limit:
                         datadict = {"time": k,
                                     "commits": 1,
                                     "added": len(cs.added),
                                     "changed": len(cs.changed),
                                     "removed": len(cs.removed),
+                                   }
                         co_day_auth_aggr[akc(cs.author)]['data'] \
                             .append(datadict)
             else:
                 if k >= ts_min_y and k <= ts_max_y or skip_date_limit:
                     co_day_auth_aggr[akc(cs.author)] = {
                                         "label": akc(cs.author),
                                         "data": [{"time": k,
                                                  "commits": 1,
                                                  "added": len(cs.added),
                                                  "changed": len(cs.changed),
                                                  "removed": len(cs.removed),
                                                  }],
                                         "schema": ["commits"],
+                                        }
             # gather all data by day
             if k in commits_by_day_aggregate:
                 commits_by_day_aggregate[k] += 1
             else:
                 commits_by_day_aggregate[k] = 1
         overview_data = sorted(commits_by_day_aggregate.items(),
                                key=itemgetter(0))
         if not co_day_auth_aggr:
             co_day_auth_aggr[akc(scm_repo.contact)] = {
                 "label": akc(scm_repo.contact),
                 "data": [0, 1],
                 "schema": ["commits"],
+            }
         stats = cur_stats if cur_stats else db.Statistics()
         stats.commit_activity = ascii_bytes(ext_json.dumps(co_day_auth_aggr))
         stats.commit_activity_combined = ascii_bytes(ext_json.dumps(overview_data))
         log.debug('last revision %s', last_rev)
         leftovers = len(scm_repo.revisions[last_rev:])
         log.debug('revisions to parse %s', leftovers)
         if last_rev == 0 or leftovers < parse_limit:
             log.debug('getting code trending stats')
             stats.languages = ascii_bytes(ext_json.dumps(__get_codes_stats(repo_name)))
         try:
             stats.repository = dbrepo
             stats.stat_on_revision = last_cs.revision if last_cs else 0
             DBS.add(stats)
             DBS.commit()
         except:
             log.error(traceback.format_exc())
             DBS.rollback()
             lock.release()
             return False
         # final release
         lock.release()
         # execute another task if celery is enabled
         if len(scm_repo.revisions) > 1 and kallithea.CELERY_APP and recurse_limit > 0:
             get_commits_stats(repo_name, ts_min_y, ts_max_y, recurse_limit - 1)
         elif recurse_limit <= 0:
             log.debug('Not recursing - limit has been reached')
         else:
             log.debug('Not recursing')
     except celerylib.LockHeld:
         log.info('Task with key %s already running', lockkey)
         return 'Task with key %s already running' % lockkey
 @celerylib.task
 @celerylib.dbsession
 def send_email(recipients, subject, body='', html_body='', headers=None, from_name=None):
     """
     Sends an email with defined parameters from the .ini files.
     :param recipients: list of recipients, if this is None, the defined email
         address from field 'email_to' and all admins is used instead
     :param subject: subject of the mail
     :param body: plain text body of the mail
     :param html_body: html version of body
     :param headers: dictionary of prepopulated e-mail headers
     :param from_name: full name to be used as sender of this mail - often a
     .full_name_or_username value
     """
     assert isinstance(recipients, list), recipients
     if headers is None:
         headers = {}
     else:
         # do not modify the original headers object passed by the caller
         headers = headers.copy()
     email_config = config
     email_prefix = email_config.get('email_prefix', '')
     if email_prefix:
         subject = "%s %s" % (email_prefix, subject)
     if not recipients:
         # if recipients are not defined we send to email_config + all admins
         recipients = [u.email for u in db.User.query()
                       .filter(db.User.admin == True).all()]
         if email_config.get('email_to') is not None:
             recipients += email_config.get('email_to').split(',')
         # If there are still no recipients, there are no admins and no address
         # configured in email_to, so return.
         if not recipients:
             log.error("No recipients specified and no fallback available.")
             return False
         log.warning("No recipients specified for '%s' - sending to admins %s", subject, ' '.join(recipients))
     # SMTP sender
     app_email_from = email_config.get('app_email_from', 'Kallithea')
     # 'From' header
     if from_name is not None:
         # set From header based on from_name but with a generic e-mail address
         # In case app_email_from is in "Some Name <e-mail>" format, we first
         # extract the e-mail address.
         envelope_addr = author_email(app_email_from)
         headers['From'] = '"%s" <%s>' % (
             email.utils.quote('%s (no-reply)' % from_name),
             envelope_addr)
     smtp_server = email_config.get('smtp_server')
     smtp_port = email_config.get('smtp_port')
     smtp_use_tls = asbool(email_config.get('smtp_use_tls'))
     smtp_use_ssl = asbool(email_config.get('smtp_use_ssl'))
     smtp_auth = email_config.get('smtp_auth')  # undocumented - overrule automatic choice of auth mechanism
     smtp_username = email_config.get('smtp_username')
     smtp_password = email_config.get('smtp_password')
     logmsg = ("Mail details:\n"
               "recipients: %s\n"
               "headers: %s\n"
               "subject: %s\n"
               "body:\n%s\n"
               "html:\n%s\n"
               % (' '.join(recipients), headers, subject, body, html_body))
     if smtp_server:
         log.debug("Sending e-mail. " + logmsg)
     else:
         log.error("SMTP mail server not configured - cannot send e-mail.")
         log.warning(logmsg)
         return False
     msg = email.message.EmailMessage()
     msg['Subject'] = subject
     msg['From'] = app_email_from  # fallback - might be overridden by a header
     msg['To'] = ', '.join(recipients)
     msg['Date'] = email.utils.formatdate(time.time())
     for key, value in headers.items():
         del msg[key]  # Delete key first to make sure add_header will replace header (if any), no matter the casing
         msg.add_header(key, value)
     msg.set_content(body)
     msg.add_alternative(html_body, subtype='html')
     try:
         if smtp_use_ssl:
             smtp_serv = smtplib.SMTP_SSL(smtp_server, smtp_port)
         else:
             smtp_serv = smtplib.SMTP(smtp_server, smtp_port)
         if smtp_use_tls:
             smtp_serv.starttls()
         if smtp_auth:
             smtp_serv.ehlo()  # populate esmtp_features
             smtp_serv.esmtp_features["auth"] = smtp_auth
         if smtp_username and smtp_password is not None:
             smtp_serv.login(smtp_username, smtp_password)
         smtp_serv.sendmail(app_email_from, recipients, msg.as_string())
         smtp_serv.quit()
         log.info('Mail was sent to: %s' % recipients)
     except:
         log.error('Mail sending failed')
         log.error(traceback.format_exc())
         return False
     return True
 @celerylib.task
 @celerylib.dbsession
 def create_repo(form_data, cur_user):
     DBS = celerylib.get_session()
     cur_user = db.User.guess_instance(cur_user)
     owner = cur_user
     repo_name = form_data['repo_name']
     repo_name_full = form_data['repo_name_full']
     repo_type = form_data['repo_type']
     description = form_data['repo_description']
     private = form_data['repo_private']
     clone_uri = form_data.get('clone_uri')
     repo_group = form_data['repo_group']
     landing_rev = form_data['repo_landing_rev']
     copy_fork_permissions = form_data.get('copy_permissions')
     copy_group_permissions = form_data.get('repo_copy_permissions')
     fork_of = form_data.get('fork_parent_id')
     state = form_data.get('repo_state', db.Repository.STATE_PENDING)
     # repo creation defaults, private and repo_type are filled in form
     defs = db.Setting.get_default_repo_settings(strip_prefix=True)
     enable_statistics = defs.get('repo_enable_statistics')
     enable_downloads = defs.get('repo_enable_downloads')
     try:
         db_repo = repo.RepoModel()._create_repo(
             repo_name=repo_name_full,
             repo_type=repo_type,
             description=description,
             owner=owner,
             private=private,
             clone_uri=clone_uri,
             repo_group=repo_group,
             landing_rev=landing_rev,
             fork_of=fork_of,

0 comments (0 inline, 0 general)