kallithea Changeset - e410c43aec42

Changeset - e410c43aec42

Parent rev.

Child rev.

[Not reviewed]

default

1 5 1

Mads Kiilerich (mads) - 5 years ago 2020-10-13 19:07:59
mads@kiilerich.com

Grafted from: 42a94a6796fb

config: move config.conf to kallithea.lib

kallithea.config is mainly the WSGI entry point with TG stuff, and thus a high
level controller thing - not a place to store configuration.

Note: The content of conf.py is kind of similar to what we have in
kallithea/__init__.py . These two should perhaps be merged somehow.

6 files changed with 11 insertions and 12 deletions:

kallithea/controllers/summary.py

kallithea/lib/celerylib/tasks.py

kallithea/lib/conf.py

kallithea/lib/indexers/daemon.py

kallithea/lib/utils.py

kallithea/tests/functional/test_search_indexing.py

0 comments (0 inline, 0 general)

kallithea/controllers/summary.py

➞

Show inline comments

 # -*- coding: utf-8 -*-
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 kallithea.controllers.summary
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Summary controller for Kallithea
 This file was forked by the Kallithea project in July 2014.
 Original author and date, and relevant copyright and licensing information is below:
 :created_on: Apr 18, 2010
 :author: marcink
 :copyright: (c) 2013 RhodeCode GmbH, and others.
 :license: GPLv3, see LICENSE.md for more details.
 """
 import calendar
 import itertools
 import logging
 import traceback
 from datetime import date, timedelta
 from time import mktime
 from beaker.cache import cache_region
 from tg import request
 from tg import tmpl_context as c
 from tg.i18n import ugettext as _
 from webob.exc import HTTPBadRequest
 import kallithea.lib.helpers as h
 from kallithea.config.conf import ALL_EXTS, ALL_READMES, LANGUAGES_EXTENSIONS_MAP
 from kallithea.lib import ext_json
 from kallithea.lib.auth import HasRepoPermissionLevelDecorator, LoginRequired
 from kallithea.lib.base import BaseRepoController, jsonify, render
 from kallithea.lib.celerylib.tasks import get_commits_stats
 from kallithea.lib.conf import ALL_EXTS, ALL_READMES, LANGUAGES_EXTENSIONS_MAP
 from kallithea.lib.markup_renderer import MarkupRenderer
 from kallithea.lib.page import Page
 from kallithea.lib.utils2 import safe_int, safe_str
 from kallithea.lib.vcs.backends.base import EmptyChangeset
 from kallithea.lib.vcs.exceptions import ChangesetError, EmptyRepositoryError, NodeDoesNotExistError
 from kallithea.lib.vcs.nodes import FileNode
 from kallithea.model.db import Statistics
 log = logging.getLogger(__name__)
 README_FILES = [''.join([x[0][0], x[1][0]]) for x in
                     sorted(list(itertools.product(ALL_READMES, ALL_EXTS)),
                            key=lambda y:y[0][1] + y[1][1])]
 class SummaryController(BaseRepoController):
     def __get_readme_data(self, db_repo):
         repo_name = db_repo.repo_name
         log.debug('Looking for README file')
         @cache_region('long_term_file', '_get_readme_from_cache')
         def _get_readme_from_cache(*_cache_keys):  # parameters are not really used - only as caching key
             readme_data = None
             readme_file = None
             try:
                 # gets the landing revision! or tip if fails
                 cs = db_repo.get_landing_changeset()
                 if isinstance(cs, EmptyChangeset):
                     raise EmptyRepositoryError()
                 renderer = MarkupRenderer()
                 for f in README_FILES:
                     try:
                         readme = cs.get_node(f)
                         if not isinstance(readme, FileNode):
                             continue
                         readme_file = f
                         log.debug('Found README file `%s` rendering...',
                                   readme_file)
                         readme_data = renderer.render(safe_str(readme.content),
                                                       filename=f)
                         break
                     except NodeDoesNotExistError:
                         continue
             except ChangesetError:
                 log.error(traceback.format_exc())
                 pass
             except EmptyRepositoryError:
                 pass
             return readme_data, readme_file
         kind = 'README'
         return _get_readme_from_cache(repo_name, kind, c.db_repo.changeset_cache.get('raw_id'))
     @LoginRequired(allow_default_user=True)
     @HasRepoPermissionLevelDecorator('read')
     def index(self, repo_name):
         p = safe_int(request.GET.get('page'), 1)
         size = safe_int(request.GET.get('size'), 10)
         try:
             collection = c.db_repo_scm_instance.get_changesets(reverse=True)
         except EmptyRepositoryError as e:
             h.flash(e, category='warning')
             collection = []
         c.cs_pagination = Page(collection, page=p, items_per_page=size)
         page_revisions = [x.raw_id for x in list(c.cs_pagination)]
         c.cs_comments = c.db_repo.get_comments(page_revisions)
         c.cs_statuses = c.db_repo.statuses(page_revisions)
         c.ssh_repo_url = None
         if request.authuser.is_default_user:
             username = None
         else:
             username = request.authuser.username
             if c.ssh_enabled:
                 c.ssh_repo_url = c.db_repo.clone_url(clone_uri_tmpl=c.clone_ssh_tmpl)
         c.clone_repo_url = c.db_repo.clone_url(clone_uri_tmpl=c.clone_uri_tmpl, with_id=False, username=username)
         c.clone_repo_url_id = c.db_repo.clone_url(clone_uri_tmpl=c.clone_uri_tmpl, with_id=True, username=username)
         if c.db_repo.enable_statistics:
             c.show_stats = True
         else:
             c.show_stats = False
         stats = Statistics.query() \
             .filter(Statistics.repository == c.db_repo) \
             .scalar()
         c.stats_percentage = 0
         if stats and stats.languages:
             lang_stats_d = ext_json.loads(stats.languages)
             lang_stats = [(x, {"count": y,
                                "desc": LANGUAGES_EXTENSIONS_MAP.get(x, '?')})
                           for x, y in lang_stats_d.items()]
             lang_stats.sort(key=lambda k: (-k[1]['count'], k[0]))
             c.trending_languages = lang_stats[:10]
         else:
             c.trending_languages = []
         c.enable_downloads = c.db_repo.enable_downloads
         c.readme_data, c.readme_file = \
             self.__get_readme_data(c.db_repo)
         return render('summary/summary.html')
     @LoginRequired()
     @HasRepoPermissionLevelDecorator('read')
     @jsonify
     def repo_size(self, repo_name):
         if request.is_xhr:
             return c.db_repo._repo_size()
         else:
             raise HTTPBadRequest()
     @LoginRequired(allow_default_user=True)
     @HasRepoPermissionLevelDecorator('read')
     def statistics(self, repo_name):
         if c.db_repo.enable_statistics:
             c.show_stats = True
             c.no_data_msg = _('No data ready yet')
         else:
             c.show_stats = False
             c.no_data_msg = _('Statistics are disabled for this repository')
         td = date.today() + timedelta(days=1)
         td_1m = td - timedelta(days=calendar.monthrange(td.year, td.month)[1])
         td_1y = td - timedelta(days=365)
         ts_min_m = mktime(td_1m.timetuple())
         ts_min_y = mktime(td_1y.timetuple())
         ts_max_y = mktime(td.timetuple())
         c.ts_min = ts_min_m
         c.ts_max = ts_max_y
         stats = Statistics.query() \
             .filter(Statistics.repository == c.db_repo) \
             .scalar()
         c.stats_percentage = 0
         if stats and stats.languages:
             c.commit_data = ext_json.loads(stats.commit_activity)
             c.overview_data = ext_json.loads(stats.commit_activity_combined)
             lang_stats_d = ext_json.loads(stats.languages)
             lang_stats = [(x, {"count": y,
                                "desc": LANGUAGES_EXTENSIONS_MAP.get(x, '?')})
                           for x, y in lang_stats_d.items()]
             lang_stats.sort(key=lambda k: (-k[1]['count'], k[0]))
             c.trending_languages = lang_stats[:10]
             last_rev = stats.stat_on_revision + 1
             c.repo_last_rev = c.db_repo_scm_instance.count() \
                 if c.db_repo_scm_instance.revisions else 0
             if last_rev == 0 or c.repo_last_rev == 0:
                 pass
             else:
                 c.stats_percentage = '%.2f' % ((float((last_rev)) /
                                                 c.repo_last_rev) * 100)
         else:
             c.commit_data = {}
             c.overview_data = ([[ts_min_y, 0], [ts_max_y, 10]])
             c.trending_languages = []
         recurse_limit = 500  # don't recurse more than 500 times when parsing
         get_commits_stats(c.db_repo.repo_name, ts_min_y, ts_max_y, recurse_limit)
         return render('summary/statistics.html')

kallithea/lib/celerylib/tasks.py

➞

Show inline comments

 # -*- coding: utf-8 -*-
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 kallithea.lib.celerylib.tasks
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Kallithea task modules, containing all task that suppose to be run
 by celery daemon
 This file was forked by the Kallithea project in July 2014.
 Original author and date, and relevant copyright and licensing information is below:
 :created_on: Oct 6, 2010
 :author: marcink
 :copyright: (c) 2013 RhodeCode GmbH, and others.
 :license: GPLv3, see LICENSE.md for more details.
 """
 import email.utils
 import os
 import traceback
 from collections import OrderedDict
 from operator import itemgetter
 from time import mktime
 import celery.utils.log
 from tg import config
 import kallithea
 from kallithea.config import conf
 from kallithea.lib import celerylib, ext_json
 from kallithea.lib import celerylib, conf, ext_json
 from kallithea.lib.helpers import person
 from kallithea.lib.hooks import log_create_repository
 from kallithea.lib.indexers.daemon import WhooshIndexingDaemon
 from kallithea.lib.rcmail.smtp_mailer import SmtpMailer
 from kallithea.lib.utils import action_logger
 from kallithea.lib.utils2 import asbool, ascii_bytes
 from kallithea.lib.vcs.utils import author_email
 from kallithea.model.db import RepoGroup, Repository, Setting, Statistics, User
 from kallithea.model.repo import RepoModel
 __all__ = ['whoosh_index', 'get_commits_stats', 'send_email']
 log = celery.utils.log.get_task_logger(__name__)
 @celerylib.task
 @celerylib.locked_task
 @celerylib.dbsession
 def whoosh_index(repo_location, full_index):
     celerylib.get_session() # initialize database connection
     index_location = config['index_dir']
     WhooshIndexingDaemon(index_location=index_location,
                          repo_location=repo_location) \
                          .run(full_index=full_index)
 # for js data compatibility cleans the key for person from '
 def akc(k):
     return person(k).replace('"', '')
 @celerylib.task
 @celerylib.dbsession
 def get_commits_stats(repo_name, ts_min_y, ts_max_y, recurse_limit=100):
     DBS = celerylib.get_session()
     lockkey = celerylib.__get_lockkey('get_commits_stats', repo_name, ts_min_y,
                             ts_max_y)
     lockkey_path = config.get('cache_dir') or config['app_conf']['cache_dir']  # Backward compatibility for TurboGears < 2.4
     log.info('running task with lockkey %s', lockkey)
     try:
         lock = celerylib.DaemonLock(os.path.join(lockkey_path, lockkey))
         co_day_auth_aggr = {}
         commits_by_day_aggregate = {}
         repo = Repository.get_by_repo_name(repo_name)
         if repo is None:
             return True
         repo = repo.scm_instance
         repo_size = repo.count()
         # return if repo have no revisions
         if repo_size < 1:
             lock.release()
             return True
         skip_date_limit = True
         parse_limit = int(config.get('commit_parse_limit'))
         last_rev = None
         last_cs = None
         timegetter = itemgetter('time')
         dbrepo = DBS.query(Repository) \
             .filter(Repository.repo_name == repo_name).scalar()
         cur_stats = DBS.query(Statistics) \
             .filter(Statistics.repository == dbrepo).scalar()
         if cur_stats is not None:
             last_rev = cur_stats.stat_on_revision
         if last_rev == repo.get_changeset().revision and repo_size > 1:
             # pass silently without any work if we're not on first revision or
             # current state of parsing revision(from db marker) is the
             # last revision
             lock.release()
             return True
         if cur_stats:
             commits_by_day_aggregate = OrderedDict(ext_json.loads(
                                         cur_stats.commit_activity_combined))
             co_day_auth_aggr = ext_json.loads(cur_stats.commit_activity)
         log.debug('starting parsing %s', parse_limit)
         last_rev = last_rev + 1 if last_rev and last_rev >= 0 else 0
         log.debug('Getting revisions from %s to %s',
              last_rev, last_rev + parse_limit
+        )
         for cs in repo[last_rev:last_rev + parse_limit]:
             log.debug('parsing %s', cs)
             last_cs = cs  # remember last parsed changeset
             tt = cs.date.timetuple()
             k = mktime(tt[:3] + (0, 0, 0, 0, 0, 0))
             if akc(cs.author) in co_day_auth_aggr:
                 try:
                     l = [timegetter(x) for x in
                          co_day_auth_aggr[akc(cs.author)]['data']]
                     time_pos = l.index(k)
                 except ValueError:
                     time_pos = None
                 if time_pos is not None and time_pos >= 0:
                     datadict = \
                         co_day_auth_aggr[akc(cs.author)]['data'][time_pos]
                     datadict["commits"] += 1
                     datadict["added"] += len(cs.added)
                     datadict["changed"] += len(cs.changed)
                     datadict["removed"] += len(cs.removed)
                 else:
                     if k >= ts_min_y and k <= ts_max_y or skip_date_limit:
                         datadict = {"time": k,
                                     "commits": 1,
                                     "added": len(cs.added),
                                     "changed": len(cs.changed),
                                     "removed": len(cs.removed),
+                                   }
                         co_day_auth_aggr[akc(cs.author)]['data'] \
                             .append(datadict)
             else:
                 if k >= ts_min_y and k <= ts_max_y or skip_date_limit:
                     co_day_auth_aggr[akc(cs.author)] = {
                                         "label": akc(cs.author),
                                         "data": [{"time": k,
                                                  "commits": 1,
                                                  "added": len(cs.added),
                                                  "changed": len(cs.changed),
                                                  "removed": len(cs.removed),
                                                  }],
                                         "schema": ["commits"],
+                                        }
             # gather all data by day
             if k in commits_by_day_aggregate:
                 commits_by_day_aggregate[k] += 1
             else:
                 commits_by_day_aggregate[k] = 1
         overview_data = sorted(commits_by_day_aggregate.items(),
                                key=itemgetter(0))
         if not co_day_auth_aggr:
             co_day_auth_aggr[akc(repo.contact)] = {
                 "label": akc(repo.contact),
                 "data": [0, 1],
                 "schema": ["commits"],
+            }
         stats = cur_stats if cur_stats else Statistics()
         stats.commit_activity = ascii_bytes(ext_json.dumps(co_day_auth_aggr))
         stats.commit_activity_combined = ascii_bytes(ext_json.dumps(overview_data))
         log.debug('last revision %s', last_rev)
         leftovers = len(repo.revisions[last_rev:])
         log.debug('revisions to parse %s', leftovers)
         if last_rev == 0 or leftovers < parse_limit:
             log.debug('getting code trending stats')
             stats.languages = ascii_bytes(ext_json.dumps(__get_codes_stats(repo_name)))
         try:
             stats.repository = dbrepo
             stats.stat_on_revision = last_cs.revision if last_cs else 0
             DBS.add(stats)
             DBS.commit()
         except:
             log.error(traceback.format_exc())
             DBS.rollback()
             lock.release()
             return False
         # final release
         lock.release()
         # execute another task if celery is enabled
         if len(repo.revisions) > 1 and kallithea.CELERY_APP and recurse_limit > 0:
             get_commits_stats(repo_name, ts_min_y, ts_max_y, recurse_limit - 1)
         elif recurse_limit <= 0:
             log.debug('Not recursing - limit has been reached')
         else:
             log.debug('Not recursing')
     except celerylib.LockHeld:
         log.info('Task with key %s already running', lockkey)
         return 'Task with key %s already running' % lockkey
 @celerylib.task
 @celerylib.dbsession
 def send_email(recipients, subject, body='', html_body='', headers=None, from_name=None):
     """
     Sends an email with defined parameters from the .ini files.
     :param recipients: list of recipients, if this is None, the defined email
         address from field 'email_to' and all admins is used instead
     :param subject: subject of the mail
     :param body: body of the mail
     :param html_body: html version of body
     :param headers: dictionary of prepopulated e-mail headers
     :param from_name: full name to be used as sender of this mail - often a
     .full_name_or_username value
     """
     assert isinstance(recipients, list), recipients
     if headers is None:
         headers = {}
     else:
         # do not modify the original headers object passed by the caller
         headers = headers.copy()
     email_config = config
     email_prefix = email_config.get('email_prefix', '')
     if email_prefix:
         subject = "%s %s" % (email_prefix, subject)
     if not recipients:
         # if recipients are not defined we send to email_config + all admins
         recipients = [u.email for u in User.query()
                       .filter(User.admin == True).all()]
         if email_config.get('email_to') is not None:
             recipients += email_config.get('email_to').split(',')
         # If there are still no recipients, there are no admins and no address
         # configured in email_to, so return.
         if not recipients:
             log.error("No recipients specified and no fallback available.")
             return False
         log.warning("No recipients specified for '%s' - sending to admins %s", subject, ' '.join(recipients))
     # SMTP sender
     envelope_from = email_config.get('app_email_from', 'Kallithea')
     # 'From' header
     if from_name is not None:
         # set From header based on from_name but with a generic e-mail address
         # In case app_email_from is in "Some Name <e-mail>" format, we first
         # extract the e-mail address.
         envelope_addr = author_email(envelope_from)
         headers['From'] = '"%s" <%s>' % (
             email.utils.quote('%s (no-reply)' % from_name),
             envelope_addr)
     user = email_config.get('smtp_username')
     passwd = email_config.get('smtp_password')
     mail_server = email_config.get('smtp_server')
     mail_port = email_config.get('smtp_port')
     tls = asbool(email_config.get('smtp_use_tls'))
     ssl = asbool(email_config.get('smtp_use_ssl'))
     debug = asbool(email_config.get('debug'))
     smtp_auth = email_config.get('smtp_auth')
     logmsg = ("Mail details:\n"
               "recipients: %s\n"
               "headers: %s\n"
               "subject: %s\n"
               "body:\n%s\n"
               "html:\n%s\n"
               % (' '.join(recipients), headers, subject, body, html_body))
     if mail_server:
         log.debug("Sending e-mail. " + logmsg)
     else:
         log.error("SMTP mail server not configured - cannot send e-mail.")
         log.warning(logmsg)
         return False
     try:
         m = SmtpMailer(envelope_from, user, passwd, mail_server, smtp_auth,
                        mail_port, ssl, tls, debug=debug)
         m.send(recipients, subject, body, html_body, headers=headers)
     except:
         log.error('Mail sending failed')
         log.error(traceback.format_exc())
         return False
     return True
 @celerylib.task
 @celerylib.dbsession
 def create_repo(form_data, cur_user):
     DBS = celerylib.get_session()
     cur_user = User.guess_instance(cur_user)
     owner = cur_user
     repo_name = form_data['repo_name']
     repo_name_full = form_data['repo_name_full']
     repo_type = form_data['repo_type']
     description = form_data['repo_description']
     private = form_data['repo_private']
     clone_uri = form_data.get('clone_uri')
     repo_group = form_data['repo_group']
     landing_rev = form_data['repo_landing_rev']
     copy_fork_permissions = form_data.get('copy_permissions')
     copy_group_permissions = form_data.get('repo_copy_permissions')
     fork_of = form_data.get('fork_parent_id')
     state = form_data.get('repo_state', Repository.STATE_PENDING)
     # repo creation defaults, private and repo_type are filled in form
     defs = Setting.get_default_repo_settings(strip_prefix=True)
     enable_statistics = defs.get('repo_enable_statistics')
     enable_downloads = defs.get('repo_enable_downloads')
     try:
         repo = RepoModel()._create_repo(
             repo_name=repo_name_full,
             repo_type=repo_type,
             description=description,
             owner=owner,
             private=private,
             clone_uri=clone_uri,
             repo_group=repo_group,
             landing_rev=landing_rev,
             fork_of=fork_of,
             copy_fork_permissions=copy_fork_permissions,
             copy_group_permissions=copy_group_permissions,
             enable_statistics=enable_statistics,
             enable_downloads=enable_downloads,
             state=state
+        )
         action_logger(cur_user, 'user_created_repo',
                       form_data['repo_name_full'], '')
         DBS.commit()
         # now create this repo on Filesystem
         RepoModel()._create_filesystem_repo(
             repo_name=repo_name,
             repo_type=repo_type,
             repo_group=RepoGroup.guess_instance(repo_group),
             clone_uri=clone_uri,
+        )
         repo = Repository.get_by_repo_name(repo_name_full)
         log_create_repository(repo.get_dict(), created_by=owner.username)
         # update repo changeset caches initially
         repo.update_changeset_cache()
         # set new created state
         repo.set_state(Repository.STATE_CREATED)
         DBS.commit()
     except Exception as e:
         log.warning('Exception %s occurred when forking repository, '
                     'doing cleanup...' % e)
         # rollback things manually !
         repo = Repository.get_by_repo_name(repo_name_full)
         if repo:
             Repository.delete(repo.repo_id)
             DBS.commit()
             RepoModel()._delete_filesystem_repo(repo)
         raise
     return True
 @celerylib.task
 @celerylib.dbsession
 def create_repo_fork(form_data, cur_user):
     """
     Creates a fork of repository using interval VCS methods
     :param form_data:
     :param cur_user:
     """
     DBS = celerylib.get_session()
     base_path = kallithea.CONFIG['base_path']
     cur_user = User.guess_instance(cur_user)
     repo_name = form_data['repo_name']  # fork in this case
     repo_name_full = form_data['repo_name_full']
     repo_type = form_data['repo_type']
     owner = cur_user
     private = form_data['private']
     clone_uri = form_data.get('clone_uri')
     repo_group = form_data['repo_group']
     landing_rev = form_data['landing_rev']

kallithea/lib/conf.py

➞

Show inline comments

@@ file renamed from kallithea/config/conf.py to kallithea/lib/conf.py @@
 # -*- coding: utf-8 -*-
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
-kallithea.config.conf
+kallithea.lib.conf
 ~~~~~~~~~~~~~~~~~~~~~
 Various config settings for Kallithea
 This file was forked by the Kallithea project in July 2014.
 Original author and date, and relevant copyright and licensing information is below:
 :created_on: Mar 7, 2012
 :author: marcink
 :copyright: (c) 2013 RhodeCode GmbH, and others.
 :license: GPLv3, see LICENSE.md for more details.
 """
 from kallithea.lib import pygmentsutils
 # language map is also used by whoosh indexer, which for those specified
 # extensions will index it's content
 LANGUAGES_EXTENSIONS_MAP = pygmentsutils.get_extension_descriptions()
 # Whoosh index targets
 # Extensions we want to index content of using whoosh
 INDEX_EXTENSIONS = list(LANGUAGES_EXTENSIONS_MAP)
 # Filenames we want to index content of using whoosh
 INDEX_FILENAMES = pygmentsutils.get_index_filenames()
 # list of readme files to search in file tree and display in summary
 # attached weights defines the search  order lower is first
 ALL_READMES = [
     ('readme', 0), ('README', 0), ('Readme', 0),
     ('doc/readme', 1), ('doc/README', 1), ('doc/Readme', 1),
     ('Docs/readme', 2), ('Docs/README', 2), ('Docs/Readme', 2),
     ('DOCS/readme', 2), ('DOCS/README', 2), ('DOCS/Readme', 2),
     ('docs/readme', 2), ('docs/README', 2), ('docs/Readme', 2),
+]
 # extension together with weights to search lower is first
 RST_EXTS = [
     ('', 0), ('.rst', 1), ('.rest', 1),
     ('.RST', 2), ('.REST', 2),
     ('.txt', 3), ('.TXT', 3)
+]
 MARKDOWN_EXTS = [
     ('.md', 1), ('.MD', 1),
     ('.mkdn', 2), ('.MKDN', 2),
     ('.mdown', 3), ('.MDOWN', 3),
     ('.markdown', 4), ('.MARKDOWN', 4)
+]
 PLAIN_EXTS = [('.text', 2), ('.TEXT', 2)]
 ALL_EXTS = MARKDOWN_EXTS + RST_EXTS + PLAIN_EXTS

kallithea/lib/indexers/daemon.py

➞

Show inline comments

 # -*- coding: utf-8 -*-
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 kallithea.lib.indexers.daemon
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 A daemon will read from task table and run tasks
 This file was forked by the Kallithea project in July 2014.
 Original author and date, and relevant copyright and licensing information is below:
 :created_on: Jan 26, 2010
 :author: marcink
 :copyright: (c) 2013 RhodeCode GmbH, and others.
 :license: GPLv3, see LICENSE.md for more details.
 """
 import logging
 import os
 import sys
 import traceback
 from os.path import dirname
 from shutil import rmtree
 from time import mktime
 from whoosh.index import create_in, exists_in, open_dir
 from whoosh.qparser import QueryParser
-from kallithea.config.conf import INDEX_EXTENSIONS, INDEX_FILENAMES
+from kallithea.lib.conf import INDEX_EXTENSIONS, INDEX_FILENAMES
 from kallithea.lib.indexers import CHGSET_IDX_NAME, CHGSETS_SCHEMA, IDX_NAME, SCHEMA
 from kallithea.lib.utils2 import safe_str
 from kallithea.lib.vcs.exceptions import ChangesetDoesNotExistError, ChangesetError, NodeDoesNotExistError, RepositoryError
 from kallithea.model.db import Repository
 from kallithea.model.scm import ScmModel
 # Add location of top level folder to sys.path
 project_path = dirname(dirname(dirname(dirname(os.path.realpath(__file__)))))
 sys.path.append(project_path)
 log = logging.getLogger('whoosh_indexer')
 class WhooshIndexingDaemon(object):
     """
     Daemon for atomic indexing jobs
     """
     def __init__(self, indexname=IDX_NAME, index_location=None,
                  repo_location=None, repo_list=None,
                  repo_update_list=None):
         self.indexname = indexname
         self.index_location = index_location
         if not index_location:
             raise Exception('You have to provide index location')
         self.repo_location = repo_location
         if not repo_location:
             raise Exception('You have to provide repositories location')
         self.repo_paths = ScmModel().repo_scan(self.repo_location)
         # filter repo list
         if repo_list:
             repo_list = set(repo_list)
             self.filtered_repo_paths = {}
             for repo_name, repo in self.repo_paths.items():
                 if repo_name in repo_list:
                     self.filtered_repo_paths[repo_name] = repo
             self.repo_paths = self.filtered_repo_paths
         # filter update repo list
         self.filtered_repo_update_paths = {}
         if repo_update_list:
             self.filtered_repo_update_paths = {}
             for repo_name, repo in self.repo_paths.items():
                 if repo_name in repo_update_list:
                     self.filtered_repo_update_paths[repo_name] = repo
             self.repo_paths = self.filtered_repo_update_paths
         self.initial = True
         if not os.path.isdir(self.index_location):
             os.makedirs(self.index_location)
             log.info('Cannot run incremental index since it does not '
                      'yet exist - running full build')
         elif not exists_in(self.index_location, IDX_NAME):
             log.info('Running full index build, as the file content '
                      'index does not exist')
         elif not exists_in(self.index_location, CHGSET_IDX_NAME):
             log.info('Running full index build, as the changeset '
                      'index does not exist')
         else:
             self.initial = False
     def _get_index_revision(self, repo):
         db_repo = Repository.get_by_repo_name(repo.name)
         landing_rev = 'tip'
         if db_repo:
             _rev_type, _rev = db_repo.landing_rev
             landing_rev = _rev
         return landing_rev
     def _get_index_changeset(self, repo, index_rev=None):
         if not index_rev:
             index_rev = self._get_index_revision(repo)
         cs = repo.get_changeset(index_rev)
         return cs
     def get_paths(self, repo):
         """
         recursive walk in root dir and return a set of all path in that dir
         based on repository walk function
         """
         index_paths_ = set()
         try:
             cs = self._get_index_changeset(repo)
             for _topnode, _dirs, files in cs.walk('/'):
                 for f in files:
                     index_paths_.add(os.path.join(repo.path, f.path))
         except RepositoryError:
             log.debug(traceback.format_exc())
             pass
         return index_paths_
     def get_node(self, repo, path, index_rev=None):
         """
         gets a filenode based on given full path.
         :param repo: scm repo instance
         :param path: full path including root location
         :return: FileNode
         """
         # FIXME: paths should be normalized ... or even better: don't include repo.path
         assert path.startswith(repo.path)
         assert path[len(repo.path)] in (os.path.sep, os.path.altsep)
         node_path = path[len(repo.path) + 1:]
         cs = self._get_index_changeset(repo, index_rev=index_rev)
         node = cs.get_node(node_path)
         return node
     def is_indexable_node(self, node):
         """
         Just index the content of chosen files, skipping binary files
         """
         return (node.extension in INDEX_EXTENSIONS or node.name in INDEX_FILENAMES) and \
                not node.is_binary
     def get_node_mtime(self, node):
         return mktime(node.last_changeset.date.timetuple())
     def add_doc(self, writer, path, repo, repo_name, index_rev=None):
         """
         Adding doc to writer this function itself fetches data from
         the instance of vcs backend
         """
         try:
             node = self.get_node(repo, path, index_rev)
         except (ChangesetError, NodeDoesNotExistError):
             log.debug("    >> %s - not found in %s %s", path, repo, index_rev)
             return 0, 0
         indexed = indexed_w_content = 0
         if self.is_indexable_node(node):
             bytes_content = node.content
             if b'\0' in bytes_content:
                 log.warning('    >> %s - no text content', path)
                 u_content = ''
             else:
                 log.debug('    >> %s', path)
                 u_content = safe_str(bytes_content)
                 indexed_w_content += 1
         else:
             log.debug('    >> %s - not indexable', path)
             # just index file name without it's content
             u_content = ''
             indexed += 1
         writer.add_document(
             fileid=path,
             owner=repo.contact,
             repository_rawname=repo_name,
             repository=repo_name,
             path=path,
             content=u_content,
             modtime=self.get_node_mtime(node),
             extension=node.extension
+        )
         return indexed, indexed_w_content
     def index_changesets(self, writer, repo_name, repo, start_rev=None):
         """
         Add all changeset in the vcs repo starting at start_rev
         to the index writer
         :param writer: the whoosh index writer to add to
         :param repo_name: name of the repository from whence the
           changeset originates including the repository group
         :param repo: the vcs repository instance to index changesets for,
           the presumption is the repo has changesets to index
         :param start_rev=None: the full sha id to start indexing from
           if start_rev is None then index from the first changeset in
           the repo
         """
         if start_rev is None:
             start_rev = repo[0].raw_id
         log.debug('Indexing changesets in %s, starting at rev %s',
                   repo_name, start_rev)
         indexed = 0
         cs_iter = repo.get_changesets(start=start_rev)
         total = len(cs_iter)
         for cs in cs_iter:
             indexed += 1
             log.debug('    >> %s %s/%s', cs, indexed, total)
             writer.add_document(
                 raw_id=cs.raw_id,
                 owner=repo.contact,
                 date=cs._timestamp,
                 repository_rawname=repo_name,
                 repository=repo_name,
                 author=cs.author,
                 message=cs.message,
                 last=cs.last,
                 added=' '.join(node.path for node in cs.added).lower(),
                 removed=' '.join(node.path for node in cs.removed).lower(),
                 changed=' '.join(node.path for node in cs.changed).lower(),
                 parents=' '.join(cs.raw_id for cs in cs.parents),
+            )
         return indexed
     def index_files(self, file_idx_writer, repo_name, repo):
         """
         Index files for given repo_name
         :param file_idx_writer: the whoosh index writer to add to
         :param repo_name: name of the repository we're indexing
         :param repo: instance of vcs repo
         """
         i_cnt = iwc_cnt = 0
         log.debug('Building file index for %s @revision:%s', repo_name,
                                                 self._get_index_revision(repo))
         index_rev = self._get_index_revision(repo)
         for idx_path in self.get_paths(repo):
             i, iwc = self.add_doc(file_idx_writer, idx_path, repo, repo_name, index_rev)
             i_cnt += i
             iwc_cnt += iwc
         log.debug('added %s files %s with content for repo %s',
                   i_cnt + iwc_cnt, iwc_cnt, repo.path)
         return i_cnt, iwc_cnt
     def update_changeset_index(self):
         idx = open_dir(self.index_location, indexname=CHGSET_IDX_NAME)
         with idx.searcher() as searcher:
             writer = idx.writer()
             writer_is_dirty = False
             try:
                 indexed_total = 0
                 repo_name = None
                 for repo_name, repo in sorted(self.repo_paths.items()):
                     log.debug('Updating changeset index for repo %s', repo_name)
                     # skip indexing if there aren't any revs in the repo
                     num_of_revs = len(repo)
                     if num_of_revs < 1:
                         continue
                     qp = QueryParser('repository', schema=CHGSETS_SCHEMA)
                     q = qp.parse("last:t AND %s" % repo_name)
                     results = searcher.search(q)
                     # default to scanning the entire repo
                     last_rev = 0
                     start_id = None
                     if len(results) > 0:
                         # assuming that there is only one result, if not this
                         # may require a full re-index.
                         start_id = results[0]['raw_id']
                         try:
                             last_rev = repo.get_changeset(revision=start_id).revision
                         except ChangesetDoesNotExistError:
                             log.error('previous last revision %s not found - indexing from scratch', start_id)
                             start_id = None
                     # there are new changesets to index or a new repo to index
                     if last_rev == 0 or num_of_revs > last_rev + 1:
                         # delete the docs in the index for the previous
                         # last changeset(s)
                         for hit in results:
                             q = qp.parse("last:t AND %s AND raw_id:%s" %
                                             (repo_name, hit['raw_id']))
                             writer.delete_by_query(q)
                         # index from the previous last changeset + all new ones
                         indexed_total += self.index_changesets(writer,
                                                 repo_name, repo, start_id)
                         writer_is_dirty = True
                 log.debug('indexed %s changesets for repo %s',
                           indexed_total, repo_name
+                )
             finally:
                 if writer_is_dirty:
                     log.debug('>> COMMITING CHANGES TO CHANGESET INDEX<<')
                     writer.commit(merge=True)
                     log.debug('>>> FINISHED REBUILDING CHANGESET INDEX <<<')
                 else:
                     log.debug('>> NOTHING TO COMMIT TO CHANGESET INDEX<<')
     def update_file_index(self):
         log.debug('STARTING INCREMENTAL INDEXING UPDATE FOR EXTENSIONS %s '
                   'AND REPOS %s', INDEX_EXTENSIONS, ' and '.join(self.repo_paths))
         idx = open_dir(self.index_location, indexname=self.indexname)
         # The set of all paths in the index
         indexed_paths = set()
         # The set of all paths we need to re-index
         to_index = set()
         writer = idx.writer()
         writer_is_dirty = False
         try:
             with idx.reader() as reader:
                 # Loop over the stored fields in the index
                 for fields in reader.all_stored_fields():
                     indexed_path = fields['path']
                     indexed_repo_path = fields['repository']
                     indexed_paths.add(indexed_path)
                     if indexed_repo_path not in self.filtered_repo_update_paths:
                         continue
                     repo = self.repo_paths[indexed_repo_path]
                     try:
                         node = self.get_node(repo, indexed_path)
                         # Check if this file was changed since it was indexed
                         indexed_time = fields['modtime']
                         mtime = self.get_node_mtime(node)
                         if mtime > indexed_time:
                             # The file has changed, delete it and add it to
                             # the list of files to reindex
                             log.debug(
                                 'adding to reindex list %s mtime: %s vs %s',
                                     indexed_path, mtime, indexed_time
+                            )
                             writer.delete_by_term('fileid', indexed_path)
                             writer_is_dirty = True
                             to_index.add(indexed_path)
                     except (ChangesetError, NodeDoesNotExistError):
                         # This file was deleted since it was indexed
                         log.debug('removing from index %s', indexed_path)
                         writer.delete_by_term('path', indexed_path)
                         writer_is_dirty = True
             # Loop over the files in the filesystem
             # Assume we have a function that gathers the filenames of the
             # documents to be indexed
             ri_cnt_total = 0  # indexed
             riwc_cnt_total = 0  # indexed with content
             for repo_name, repo in sorted(self.repo_paths.items()):
                 log.debug('Updating file index for repo %s', repo_name)
                 # skip indexing if there aren't any revisions
                 if len(repo) < 1:
                     continue
                 ri_cnt = 0   # indexed
                 riwc_cnt = 0  # indexed with content
                 for path in self.get_paths(repo):
                     if path in to_index or path not in indexed_paths:
                         # This is either a file that's changed, or a new file
                         # that wasn't indexed before. So index it!
                         i, iwc = self.add_doc(writer, path, repo, repo_name)
                         writer_is_dirty = True
                         ri_cnt += i
                         ri_cnt_total += 1
                         riwc_cnt += iwc
                         riwc_cnt_total += iwc
                 log.debug('added %s files %s with content for repo %s',
                              ri_cnt + riwc_cnt, riwc_cnt, repo.path
+                )
             log.debug('indexed %s files in total and %s with content',
                         ri_cnt_total, riwc_cnt_total
+            )
         finally:
             if writer_is_dirty:
                 log.debug('>> COMMITING CHANGES TO FILE INDEX <<')
                 writer.commit(merge=True)
                 log.debug('>>> FINISHED REBUILDING FILE INDEX <<<')
             else:
                 log.debug('>> NOTHING TO COMMIT TO FILE INDEX <<')
                 writer.cancel()
     def build_indexes(self):
         if os.path.exists(self.index_location):
             log.debug('removing previous index')
             rmtree(self.index_location)
         if not os.path.exists(self.index_location):
             os.mkdir(self.index_location)

kallithea/lib/utils.py

➞

Show inline comments

 # -*- coding: utf-8 -*-
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
+#
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 kallithea.lib.utils
 ~~~~~~~~~~~~~~~~~~~
 Utilities library for Kallithea
 This file was forked by the Kallithea project in July 2014.
 Original author and date, and relevant copyright and licensing information is below:
 :created_on: Apr 18, 2010
 :author: marcink
 :copyright: (c) 2013 RhodeCode GmbH, and others.
 :license: GPLv3, see LICENSE.md for more details.
 """
 import datetime
 import logging
 import os
 import re
 import sys
 import traceback
 import urllib.error
 from distutils.version import StrictVersion
 import mercurial.config
 import mercurial.error
 import mercurial.ui
-import kallithea.config.conf
+import kallithea.lib.conf
 from kallithea.lib.exceptions import InvalidCloneUriException
 from kallithea.lib.utils2 import ascii_bytes, aslist, extract_mentioned_usernames, get_current_authuser, safe_bytes, safe_str
 from kallithea.lib.vcs.backends.git.repository import GitRepository
 from kallithea.lib.vcs.backends.hg.repository import MercurialRepository
 from kallithea.lib.vcs.conf import settings
 from kallithea.lib.vcs.exceptions import RepositoryError, VCSError
 from kallithea.lib.vcs.utils.fakemod import create_module
 from kallithea.lib.vcs.utils.helpers import get_scm
 from kallithea.model import meta
 from kallithea.model.db import RepoGroup, Repository, Setting, Ui, User, UserGroup, UserLog
 log = logging.getLogger(__name__)
 REMOVED_REPO_PAT = re.compile(r'rm__\d{8}_\d{6}_\d{6}_.*')
 #==============================================================================
 # PERM DECORATOR HELPERS FOR EXTRACTING NAMES FOR PERM CHECKS
 #==============================================================================
 def get_repo_slug(request):
     _repo = request.environ['pylons.routes_dict'].get('repo_name')
     if _repo:
         _repo = _repo.rstrip('/')
     return _repo
 def get_repo_group_slug(request):
     _group = request.environ['pylons.routes_dict'].get('group_name')
     if _group:
         _group = _group.rstrip('/')
     return _group
 def get_user_group_slug(request):
     _group = request.environ['pylons.routes_dict'].get('id')
     _group = UserGroup.get(_group)
     if _group:
         return _group.users_group_name
     return None
 def _get_permanent_id(s):
     """Helper for decoding stable URLs with repo ID. For a string like '_123'
     return 123.
     """
     by_id_match = re.match(r'^_(\d+)$', s)
     if by_id_match is None:
         return None
     return int(by_id_match.group(1))
 def fix_repo_id_name(path):
     """
     Rewrite repo_name for _<ID> permanent URLs.
     Given a path, if the first path element is like _<ID>, return the path with
     this part expanded to the corresponding full repo name, else return the
     provided path.
     """
     first, rest = path, ''
     if '/' in path:
         first, rest_ = path.split('/', 1)
         rest = '/' + rest_
     repo_id = _get_permanent_id(first)
     if repo_id is not None:
         repo = Repository.get(repo_id)
         if repo is not None:
             return repo.repo_name + rest
     return path
 def action_logger(user, action, repo, ipaddr='', commit=False):
     """
     Action logger for various actions made by users
     :param user: user that made this action, can be a unique username string or
         object containing user_id attribute
     :param action: action to log, should be on of predefined unique actions for
         easy translations
     :param repo: string name of repository or object containing repo_id,
         that action was made on
     :param ipaddr: optional IP address from what the action was made
     """
     # if we don't get explicit IP address try to get one from registered user
     # in tmpl context var
     if not ipaddr:
         ipaddr = getattr(get_current_authuser(), 'ip_addr', '')
     if getattr(user, 'user_id', None):
         user_obj = User.get(user.user_id)
     elif isinstance(user, str):
         user_obj = User.get_by_username(user)
     else:
         raise Exception('You have to provide a user object or a username')
     if getattr(repo, 'repo_id', None):
         repo_obj = Repository.get(repo.repo_id)
         repo_name = repo_obj.repo_name
     elif isinstance(repo, str):
         repo_name = repo.lstrip('/')
         repo_obj = Repository.get_by_repo_name(repo_name)
     else:
         repo_obj = None
         repo_name = ''
     user_log = UserLog()
     user_log.user_id = user_obj.user_id
     user_log.username = user_obj.username
     user_log.action = action
     user_log.repository = repo_obj
     user_log.repository_name = repo_name
     user_log.action_date = datetime.datetime.now()
     user_log.user_ip = ipaddr
     meta.Session().add(user_log)
     log.info('Logging action:%s on %s by user:%s ip:%s',
              action, repo, user_obj, ipaddr)
     if commit:
         meta.Session().commit()
 def get_filesystem_repos(path):
     """
     Scans given path for repos and return (name,(type,path)) tuple
     :param path: path to scan for repositories
     :param recursive: recursive search and return names with subdirs in front
     """
     # remove ending slash for better results
     path = path.rstrip(os.sep)
     log.debug('now scanning in %s', path)
     def isdir(*n):
         return os.path.isdir(os.path.join(*n))
     for root, dirs, _files in os.walk(path):
         recurse_dirs = []
         for subdir in dirs:
             # skip removed repos
             if REMOVED_REPO_PAT.match(subdir):
                 continue
             # skip .<something> dirs TODO: rly? then we should prevent creating them ...
             if subdir.startswith('.'):
                 continue
             cur_path = os.path.join(root, subdir)
             if isdir(cur_path, '.git'):
                 log.warning('ignoring non-bare Git repo: %s', cur_path)
                 continue
             if (isdir(cur_path, '.hg') or
                 isdir(cur_path, '.svn') or
                 isdir(cur_path, 'objects') and (isdir(cur_path, 'refs') or
                                                 os.path.isfile(os.path.join(cur_path, 'packed-refs')))):
                 if not os.access(cur_path, os.R_OK) or not os.access(cur_path, os.X_OK):
                     log.warning('ignoring repo path without access: %s', cur_path)
                     continue
                 if not os.access(cur_path, os.W_OK):
                     log.warning('repo path without write access: %s', cur_path)
                 try:
                     scm_info = get_scm(cur_path)
                     assert cur_path.startswith(path)
                     repo_path = cur_path[len(path) + 1:]
                     yield repo_path, scm_info
                     continue # no recursion
                 except VCSError:
                     # We should perhaps ignore such broken repos, but especially
                     # the bare git detection is unreliable so we dive into it
                     pass
             recurse_dirs.append(subdir)
         dirs[:] = recurse_dirs
 def is_valid_repo_uri(repo_type, url, ui):
     """Check if the url seems like a valid remote repo location
     Raise InvalidCloneUriException if any problems"""
     if repo_type == 'hg':
         if url.startswith('http') or url.startswith('ssh'):
             # initially check if it's at least the proper URL
             # or does it pass basic auth
             try:
                 MercurialRepository._check_url(url, ui)
             except urllib.error.URLError as e:
                 raise InvalidCloneUriException('URI %s URLError: %s' % (url, e))
             except mercurial.error.RepoError as e:
                 raise InvalidCloneUriException('Mercurial %s: %s' % (type(e).__name__, safe_str(bytes(e))))
         elif url.startswith('git+http'):
             raise InvalidCloneUriException('URI type %s not implemented' % (url,))
         else:
             raise InvalidCloneUriException('URI %s not allowed' % (url,))
     elif repo_type == 'git':
         if url.startswith('http') or url.startswith('git'):
             # initially check if it's at least the proper URL
             # or does it pass basic auth
             try:
                 GitRepository._check_url(url)
             except urllib.error.URLError as e:
                 raise InvalidCloneUriException('URI %s URLError: %s' % (url, e))
         elif url.startswith('hg+http'):
             raise InvalidCloneUriException('URI type %s not implemented' % (url,))
         else:
             raise InvalidCloneUriException('URI %s not allowed' % (url))
 def is_valid_repo(repo_name, base_path, scm=None):
     """
     Returns True if given path is a valid repository False otherwise.
     If scm param is given also compare if given scm is the same as expected
     from scm parameter
     :param repo_name:
     :param base_path:
     :param scm:
     :return True: if given path is a valid repository
     """
     # TODO: paranoid security checks?
     full_path = os.path.join(base_path, repo_name)
     try:
         scm_ = get_scm(full_path)
         if scm:
             return scm_[0] == scm
         return True
     except VCSError:
         return False
 def is_valid_repo_group(repo_group_name, base_path, skip_path_check=False):
     """
     Returns True if given path is a repository group False otherwise
     :param repo_name:
     :param base_path:
     """
     full_path = os.path.join(base_path, repo_group_name)
     # check if it's not a repo
     if is_valid_repo(repo_group_name, base_path):
         return False
     try:
         # we need to check bare git repos at higher level
         # since we might match branches/hooks/info/objects or possible
         # other things inside bare git repo
         get_scm(os.path.dirname(full_path))
         return False
     except VCSError:
         pass
     # check if it's a valid path
     if skip_path_check or os.path.isdir(full_path):
         return True
     return False
 def make_ui(repo_path=None):
     """
     Create an Mercurial 'ui' object based on database Ui settings, possibly
     augmenting with content from a hgrc file.
     """
     baseui = mercurial.ui.ui()
     # clean the baseui object
     baseui._ocfg = mercurial.config.config()
     baseui._ucfg = mercurial.config.config()
     baseui._tcfg = mercurial.config.config()
     sa = meta.Session()
     for ui_ in sa.query(Ui).order_by(Ui.ui_section, Ui.ui_key):
         if ui_.ui_active:
             log.debug('config from db: [%s] %s=%r', ui_.ui_section,
                       ui_.ui_key, ui_.ui_value)
             baseui.setconfig(ascii_bytes(ui_.ui_section), ascii_bytes(ui_.ui_key),
                              b'' if ui_.ui_value is None else safe_bytes(ui_.ui_value))
     # force set push_ssl requirement to False, Kallithea handles that
     baseui.setconfig(b'web', b'push_ssl', False)
     baseui.setconfig(b'web', b'allow_push', b'*')
     # prevent interactive questions for ssh password / passphrase
     ssh = baseui.config(b'ui', b'ssh', default=b'ssh')
     baseui.setconfig(b'ui', b'ssh', b'%s -oBatchMode=yes -oIdentitiesOnly=yes' % ssh)
     # push / pull hooks
     baseui.setconfig(b'hooks', b'changegroup.kallithea_log_push_action', b'python:kallithea.lib.hooks.log_push_action')
     baseui.setconfig(b'hooks', b'outgoing.kallithea_log_pull_action', b'python:kallithea.lib.hooks.log_pull_action')
     if repo_path is not None:
         # Note: MercurialRepository / mercurial.localrepo.instance will do this too, so it will always be possible to override db settings or what is hardcoded above
         baseui.readconfig(repo_path)
     assert baseui.plain()  # set by hgcompat.monkey_do (invoked from import of vcs.backends.hg) to minimize potential impact of loading config files
     return baseui
 def set_app_settings(config):
     """
     Updates app config with new settings from database
     :param config:
     """
     hgsettings = Setting.get_app_settings()
     for k, v in hgsettings.items():
         config[k] = v
     config['base_path'] = Ui.get_repos_location()
 def set_vcs_config(config):
     """
     Patch VCS config with some Kallithea specific stuff
     :param config: kallithea.CONFIG
     """
     settings.BACKENDS = {
         'hg': 'kallithea.lib.vcs.backends.hg.MercurialRepository',
         'git': 'kallithea.lib.vcs.backends.git.GitRepository',
+    }
     settings.GIT_EXECUTABLE_PATH = config.get('git_path', 'git')
     settings.GIT_REV_FILTER = config.get('git_rev_filter', '--all').strip()
     settings.DEFAULT_ENCODINGS = aslist(config.get('default_encoding',
                                                         'utf-8'), sep=',')
 def set_indexer_config(config):
     """
     Update Whoosh index mapping
     :param config: kallithea.CONFIG
     """
     log.debug('adding extra into INDEX_EXTENSIONS')
-    kallithea.config.conf.INDEX_EXTENSIONS.extend(re.split(r'\s+', config.get('index.extensions', '')))
+    kallithea.lib.conf.INDEX_EXTENSIONS.extend(re.split(r'\s+', config.get('index.extensions', '')))
     log.debug('adding extra into INDEX_FILENAMES')
-    kallithea.config.conf.INDEX_FILENAMES.extend(re.split(r'\s+', config.get('index.filenames', '')))
+    kallithea.lib.conf.INDEX_FILENAMES.extend(re.split(r'\s+', config.get('index.filenames', '')))
 def map_groups(path):
     """
     Given a full path to a repository, create all nested groups that this
     repo is inside. This function creates parent-child relationships between
     groups and creates default perms for all new groups.
     :param paths: full path to repository
     """
     from kallithea.model.repo_group import RepoGroupModel
     sa = meta.Session()
     groups = path.split(kallithea.URL_SEP)
     parent = None
     group = None
     # last element is repo in nested groups structure
     groups = groups[:-1]
     rgm = RepoGroupModel()
     owner = User.get_first_admin()
     for lvl, group_name in enumerate(groups):
         group_name = '/'.join(groups[:lvl] + [group_name])
         group = RepoGroup.get_by_group_name(group_name)
         desc = '%s group' % group_name
         # skip folders that are now removed repos
         if REMOVED_REPO_PAT.match(group_name):
             break
         if group is None:
             log.debug('creating group level: %s group_name: %s',
                       lvl, group_name)
             group = RepoGroup(group_name, parent)
             group.group_description = desc
             group.owner = owner
             sa.add(group)
             rgm._create_default_perms(group)
             sa.flush()
         parent = group
     return group
 def repo2db_mapper(initial_repo_dict, remove_obsolete=False,
                    install_git_hooks=False, user=None, overwrite_git_hooks=False):
     """
     maps all repos given in initial_repo_dict, non existing repositories
     are created, if remove_obsolete is True it also check for db entries
     that are not in initial_repo_dict and removes them.
     :param initial_repo_dict: mapping with repositories found by scanning methods
     :param remove_obsolete: check for obsolete entries in database
     :param install_git_hooks: if this is True, also check and install git hook
         for a repo if missing
     :param overwrite_git_hooks: if this is True, overwrite any existing git hooks
         that may be encountered (even if user-deployed)
     """
     from kallithea.model.repo import RepoModel
     from kallithea.model.scm import ScmModel
     sa = meta.Session()
     repo_model = RepoModel()
     if user is None:
         user = User.get_first_admin()
     added = []
     # creation defaults
     defs = Setting.get_default_repo_settings(strip_prefix=True)
     enable_statistics = defs.get('repo_enable_statistics')
     enable_downloads = defs.get('repo_enable_downloads')
     private = defs.get('repo_private')
     for name, repo in sorted(initial_repo_dict.items()):
         group = map_groups(name)
         db_repo = repo_model.get_by_repo_name(name)
         # found repo that is on filesystem not in Kallithea database
         if not db_repo:
             log.info('repository %s not found, creating now', name)
             added.append(name)
             desc = (repo.description
                     if repo.description != 'unknown'
                     else '%s repository' % name)
             new_repo = repo_model._create_repo(
                 repo_name=name,
                 repo_type=repo.alias,
                 description=desc,
                 repo_group=getattr(group, 'group_id', None),
                 owner=user,
                 enable_downloads=enable_downloads,
                 enable_statistics=enable_statistics,
                 private=private,
                 state=Repository.STATE_CREATED
+            )
             sa.commit()
             # we added that repo just now, and make sure it has githook
             # installed, and updated server info
             if new_repo.repo_type == 'git':
                 git_repo = new_repo.scm_instance
                 ScmModel().install_git_hooks(git_repo)
                 # update repository server-info
                 log.debug('Running update server info')
                 git_repo._update_server_info()
             new_repo.update_changeset_cache()
         elif install_git_hooks:
             if db_repo.repo_type == 'git':
                 ScmModel().install_git_hooks(db_repo.scm_instance, force=overwrite_git_hooks)
     removed = []
     # remove from database those repositories that are not in the filesystem
     for repo in sa.query(Repository).all():
         if repo.repo_name not in initial_repo_dict:
             if remove_obsolete:
                 log.debug("Removing non-existing repository found in db `%s`",
                           repo.repo_name)
                 try:
                     RepoModel().delete(repo, forks='detach', fs_remove=False)
                     sa.commit()
                 except Exception:
                     #don't hold further removals on error
                     log.error(traceback.format_exc())
                     sa.rollback()
             removed.append(repo.repo_name)
     return added, removed
 def load_extensions(root_path):
     try:
         ext = create_module('extensions', os.path.join(root_path, 'extensions.py'))
     except FileNotFoundError:
         try:
             ext = create_module('rc', os.path.join(root_path, 'rcextensions', '__init__.py'))
             log.warning('The name "rcextensions" is deprecated. Please use a file `extensions.py` instead of a directory `rcextensions`.')
         except FileNotFoundError:
             return
     log.info('Loaded Kallithea extensions from %s', ext)
     kallithea.EXTENSIONS = ext
     # Additional mappings that are not present in the pygments lexers
-    kallithea.config.conf.LANGUAGES_EXTENSIONS_MAP.update(getattr(ext, 'EXTRA_MAPPINGS', {}))
+    kallithea.lib.conf.LANGUAGES_EXTENSIONS_MAP.update(getattr(ext, 'EXTRA_MAPPINGS', {}))
     # Override any INDEX_EXTENSIONS
     if getattr(ext, 'INDEX_EXTENSIONS', []):
         log.debug('settings custom INDEX_EXTENSIONS')
-        kallithea.config.conf.INDEX_EXTENSIONS = getattr(ext, 'INDEX_EXTENSIONS', [])
+        kallithea.lib.conf.INDEX_EXTENSIONS = getattr(ext, 'INDEX_EXTENSIONS', [])
     # Additional INDEX_EXTENSIONS
     log.debug('adding extra into INDEX_EXTENSIONS')
-    kallithea.config.conf.INDEX_EXTENSIONS.extend(getattr(ext, 'EXTRA_INDEX_EXTENSIONS', []))
+    kallithea.lib.conf.INDEX_EXTENSIONS.extend(getattr(ext, 'EXTRA_INDEX_EXTENSIONS', []))
 #==============================================================================
 # MISC
 #==============================================================================
 git_req_ver = StrictVersion('1.7.4')
 def check_git_version():
     """
     Checks what version of git is installed on the system, and raise a system exit
     if it's too old for Kallithea to work properly.
     """
     if 'git' not in kallithea.BACKENDS:
         return None
     if not settings.GIT_EXECUTABLE_PATH:
         log.warning('No git executable configured - check "git_path" in the ini file.')
         return None
     try:
         stdout, stderr = GitRepository._run_git_command(['--version'])
     except RepositoryError as e:
         # message will already have been logged as error
         log.warning('No working git executable found - check "git_path" in the ini file.')
         return None
     if stderr:
         log.warning('Error/stderr from "%s --version":\n%s', settings.GIT_EXECUTABLE_PATH, safe_str(stderr))
     if not stdout:
         log.warning('No working git executable found - check "git_path" in the ini file.')
         return None
     output = safe_str(stdout).strip()
     m = re.search(r"\d+.\d+.\d+", output)
     if m:
         ver = StrictVersion(m.group(0))
         log.debug('Git executable: "%s", version %s (parsed from: "%s")',
                   settings.GIT_EXECUTABLE_PATH, ver, output)
         if ver < git_req_ver:
             log.error('Kallithea detected %s version %s, which is too old '
                       'for the system to function properly. '
                       'Please upgrade to version %s or later. '
                       'If you strictly need Mercurial repositories, you can '
                       'clear the "git_path" setting in the ini file.',
                       settings.GIT_EXECUTABLE_PATH, ver, git_req_ver)
             log.error("Terminating ...")
             sys.exit(1)
     else:
         ver = StrictVersion('0.0.0')
         log.warning('Error finding version number in "%s --version" stdout:\n%s',
                     settings.GIT_EXECUTABLE_PATH, output)
     return ver
 def extract_mentioned_users(text):
     """ Returns set of actual database Users @mentioned in given text. """
     result = set()
     for name in extract_mentioned_usernames(text):
         user = User.get_by_username(name, case_insensitive=True)
         if user is not None and not user.is_default_user:
             result.add(user)
     return result

kallithea/tests/functional/test_search_indexing.py

➞

Show inline comments

 import mock
 import kallithea
-from kallithea.config.conf import INDEX_FILENAMES
+from kallithea.lib.conf import INDEX_FILENAMES
 from kallithea.model.meta import Session
 from kallithea.model.repo import RepoModel
 from kallithea.model.repo_group import RepoGroupModel
 from kallithea.tests import base
 from kallithea.tests.fixture import Fixture, create_test_index
 fixture = Fixture()
 def init_indexing_test(repo):
     prev = fixture.commit_change(repo.repo_name,
                                  filename='this_should_be_unique_filename.txt',
                                  content='this_should_be_unique_content\n',
                                  message='this_should_be_unique_commit_log',
                                  vcs_type='hg',
                                  newfile=True)
 def init_stopword_test(repo):
     prev = fixture.commit_change(repo.repo_name,
                                  filename='this/is/it',
                                  content='def test\n',
                                  message='bother to ask where - in folder',
                                  vcs_type='hg',
                                  newfile=True)
     prev = fixture.commit_change(repo.repo_name,
                                  filename='join.us',
                                  content='def test\n',
                                  message='bother to ask where - top level',
                                  author='this is it <this-is-it@foo.bar.com>',
                                  vcs_type='hg',
                                  parent=prev,
                                  newfile=True)
 repos = [
     # reponame,              init func or fork base, groupname
     ('indexing_test',       init_indexing_test,     None),
     ('indexing_test-fork',  'indexing_test',       None),
     ('group/indexing_test', 'indexing_test',       'group'),
     ('this-is-it',          'indexing_test',       None),
     ('indexing_test-foo',   'indexing_test',       None),
     ('stopword_test',       init_stopword_test,     None),
+]
 # map: name => id
 repoids = {}
 groupids = {}
 def rebuild_index(full_index):
     with mock.patch('kallithea.lib.indexers.daemon.log.debug',
                     lambda *args, **kwargs: None):
         # The more revisions managed repositories have, the more
         # memory capturing "log.debug()" output in "indexers.daemon"
         # requires. This may cause unintentional failure of subsequent
         # tests, if ENOMEM at forking "git" prevents from rebuilding
         # index for search.
         # Therefore, "log.debug()" is disabled regardless of logging
         # level while rebuilding index.
         # (FYI, ENOMEM occurs at forking "git" with python 2.7.3,
         # Linux 3.2.78-1 x86_64, 3GB memory, and no ulimit
         # configuration for memory)
         create_test_index(base.TESTS_TMP_PATH, kallithea.CONFIG, full_index=full_index)
 class TestSearchControllerIndexing(base.TestController):
     @classmethod
     def setup_class(cls):
         for reponame, init_or_fork, groupname in repos:
             if groupname and groupname not in groupids:
                 group = fixture.create_repo_group(groupname)
                 groupids[groupname] = group.group_id
             if callable(init_or_fork):
                 repo = fixture.create_repo(reponame,
                                            repo_group=groupname)
                 init_or_fork(repo)
             else:
                 repo = fixture.create_fork(init_or_fork, reponame,
                                            repo_group=groupname)
             repoids[reponame] = repo.repo_id
         # treat "it" as indexable filename
         filenames_mock = list(INDEX_FILENAMES)
         filenames_mock.append('it')
         with mock.patch('kallithea.lib.indexers.daemon.INDEX_FILENAMES',
                         filenames_mock):
             rebuild_index(full_index=False) # only for newly added repos
     @classmethod
     def teardown_class(cls):
         # delete in reversed order, to delete fork destination at first
         for reponame, init_or_fork, groupname in reversed(repos):
             RepoModel().delete(repoids[reponame])
         for reponame, init_or_fork, groupname in reversed(repos):
             if groupname in groupids:
                 RepoGroupModel().delete(groupids.pop(groupname),
                                         force_delete=True)
         Session().commit()
         Session.remove()
         rebuild_index(full_index=True) # rebuild fully for subsequent tests
     @base.parametrize('reponame', [
         ('indexing_test'),
         ('indexing_test-fork'),
         ('group/indexing_test'),
         ('this-is-it'),
         ('*-fork'),
         ('group/*'),
     ])
     @base.parametrize('searchtype,query,hit', [
         ('content', 'this_should_be_unique_content', 1),
         ('commit', 'this_should_be_unique_commit_log', 1),
         ('path', 'this_should_be_unique_filename.txt', 1),
     ])
     def test_repository_tokenization(self, reponame, searchtype, query, hit):
         self.log_user()
         q = 'repository:%s %s' % (reponame, query)
         response = self.app.get(base.url(controller='search', action='index'),
                                 {'q': q, 'type': searchtype})
         response.mustcontain('>%d results' % hit)
     @base.parametrize('reponame', [
         ('indexing_test'),
         ('indexing_test-fork'),
         ('group/indexing_test'),
         ('this-is-it'),
     ])
     @base.parametrize('searchtype,query,hit', [
         ('content', 'this_should_be_unique_content', 1),
         ('commit', 'this_should_be_unique_commit_log', 1),
         ('path', 'this_should_be_unique_filename.txt', 1),
     ])
     def test_searching_under_repository(self, reponame, searchtype, query, hit):
         self.log_user()
         response = self.app.get(base.url(controller='search', action='index',
                                     repo_name=reponame),
                                 {'q': query, 'type': searchtype})
         response.mustcontain('>%d results' % hit)
     @base.parametrize('searchtype,query,hit', [
         ('content', 'path:this/is/it def test', 1),
         ('commit', 'added:this/is/it bother to ask where', 1),
         # this condition matches against files below, because
         # "path:" condition is also applied on "repository path".
         # - "this/is/it" in "stopword_test" repo
         # - "this_should_be_unique_filename.txt" in "this-is-it" repo
         ('path', 'this/is/it', 2),
         ('content', 'extension:us', 1),
         ('path', 'extension:us', 1),
     ])
     def test_filename_stopword(self, searchtype, query, hit):
         response = self.app.get(base.url(controller='search', action='index'),
                                 {'q': query, 'type': searchtype})
         response.mustcontain('>%d results' % hit)
     @base.parametrize('searchtype,query,hit', [
         # matching against both 2 files
         ('content', 'owner:"this is it"', 0),
         ('content', 'owner:this-is-it', 0),
         ('path', 'owner:"this is it"', 0),
         ('path', 'owner:this-is-it', 0),
         # matching against both 2 revisions
         ('commit', 'owner:"this is it"', 0),
         ('commit', 'owner:"this-is-it"', 0),
         # matching against only 1 revision
         ('commit', 'author:"this is it"', 1),
         ('commit', 'author:"this-is-it"', 1),
     ])
     def test_mailaddr_stopword(self, searchtype, query, hit):
         response = self.app.get(base.url(controller='search', action='index'),
                                 {'q': query, 'type': searchtype})
         response.mustcontain('>%d results' % hit)

0 comments (0 inline, 0 general)