# -*- coding: utf-8 -*-
"""
rhodecode.lib.celerylib.__init__
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
celery libs for RhodeCode
:created_on: Nov 27, 2010
:author: marcink
:copyright: (C) 2009-2011 Marcin Kuzminski <marcin@python-works.com>
:license: GPLv3, see COPYING for more details.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import sys
import socket
import traceback
import logging
from os.path import dirname as dn, join as jn
from hashlib import md5
from decorator import decorator
from pylons import config
from vcs.utils.lazy import LazyProperty
from rhodecode.lib import str2bool
from rhodecode.lib.pidlock import DaemonLock, LockHeld
from celery.messaging import establish_connection
log = logging.getLogger(__name__)
try:
CELERY_ON = str2bool(config['app_conf'].get('use_celery'))
except KeyError:
CELERY_ON = False
class ResultWrapper(object):
def __init__(self, task):
self.task = task
@LazyProperty
def result(self):
return self.task
def run_task(task, *args, **kwargs):
if CELERY_ON:
t = task.apply_async(args=args, kwargs=kwargs)
log.info('running task %s:%s', t.task_id, task)
return t
except socket.error, e:
if isinstance(e, IOError) and e.errno == 111:
log.debug('Unable to connect to celeryd. Sync execution')
else:
log.error(traceback.format_exc())
except KeyError, e:
except Exception, e:
log.debug('executing task %s in sync mode', task)
return ResultWrapper(task(*args, **kwargs))
def __get_lockkey(func, *fargs, **fkwargs):
params = list(fargs)
params.extend(['%s-%s' % ar for ar in fkwargs.items()])
func_name = str(func.__name__) if hasattr(func, '__name__') else str(func)
lockkey = 'task_%s.lock' % \
md5(func_name + '-' + '-'.join(map(str, params))).hexdigest()
return lockkey
def locked_task(func):
def __wrapper(func, *fargs, **fkwargs):
lockkey = __get_lockkey(func, *fargs, **fkwargs)
lockkey_path = dn(dn(dn(os.path.abspath(__file__))))
lockkey_path = config['here']
log.info('running task with lockkey %s', lockkey)
l = DaemonLock(jn(lockkey_path, lockkey))
l = DaemonLock(file_=jn(lockkey_path, lockkey))
ret = func(*fargs, **fkwargs)
l.release()
return ret
except LockHeld:
log.info('LockHeld')
return 'Task with key %s already running' % lockkey
return decorator(__wrapper, func)
@@ -4,196 +4,197 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RhodeCode task modules, containing all task that suppose to be run
by celery daemon
:created_on: Oct 6, 2010
from celery.decorators import task
from time import mktime
from operator import itemgetter
from string import lower
from pylons import config, url
from pylons.i18n.translation import _
from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP, safe_str
from rhodecode.lib.celerylib import run_task, locked_task, str2bool, \
__get_lockkey, LockHeld, DaemonLock
from rhodecode.lib.helpers import person
from rhodecode.lib.smtp_mailer import SmtpMailer
from rhodecode.lib.utils import add_cache
from rhodecode.lib.compat import json, OrderedDict
from rhodecode.model import init_model
from rhodecode.model import meta
from rhodecode.model.db import RhodeCodeUi, Statistics, Repository
from vcs.backends import get_repo
from sqlalchemy import engine_from_config
add_cache(config)
__all__ = ['whoosh_index', 'get_commits_stats',
'reset_user_password', 'send_email']
def get_session():
engine = engine_from_config(config, 'sqlalchemy.db1.')
init_model(engine)
sa = meta.Session()
return sa
def get_repos_path():
sa = get_session()
q = sa.query(RhodeCodeUi).filter(RhodeCodeUi.ui_key == '/').one()
return q.ui_value
@task(ignore_result=True)
@locked_task
def whoosh_index(repo_location, full_index):
#log = whoosh_index.get_logger()
from rhodecode.lib.indexers.daemon import WhooshIndexingDaemon
index_location = config['index_dir']
WhooshIndexingDaemon(index_location=index_location,
repo_location=repo_location, sa=get_session())\
.run(full_index=full_index)
def get_commits_stats(repo_name, ts_min_y, ts_max_y):
log = get_commits_stats.get_logger()
except:
lockkey = __get_lockkey('get_commits_stats', repo_name, ts_min_y,
ts_max_y)
lockkey_path = dn(dn(dn(dn(os.path.abspath(__file__)))))
lock = l = DaemonLock(jn(lockkey_path, lockkey))
lock = l = DaemonLock(file_=jn(lockkey_path, lockkey))
#for js data compatibilty cleans the key for person from '
akc = lambda k: person(k).replace('"', "")
co_day_auth_aggr = {}
commits_by_day_aggregate = {}
repos_path = get_repos_path()
repo = get_repo(safe_str(os.path.join(repos_path, repo_name)))
repo_size = len(repo.revisions)
#return if repo have no revisions
if repo_size < 1:
lock.release()
return True
skip_date_limit = True
parse_limit = int(config['app_conf'].get('commit_parse_limit'))
last_rev = 0
last_cs = None
timegetter = itemgetter('time')
dbrepo = sa.query(Repository)\
.filter(Repository.repo_name == repo_name).scalar()
cur_stats = sa.query(Statistics)\
.filter(Statistics.repository == dbrepo).scalar()
if cur_stats is not None:
last_rev = cur_stats.stat_on_revision
if last_rev == repo.get_changeset().revision and repo_size > 1:
#pass silently without any work if we're not on first revision or
#current state of parsing revision(from db marker) is the
#last revision
if cur_stats:
commits_by_day_aggregate = OrderedDict(json.loads(
cur_stats.commit_activity_combined))
co_day_auth_aggr = json.loads(cur_stats.commit_activity)
log.debug('starting parsing %s', parse_limit)
lmktime = mktime
last_rev = last_rev + 1 if last_rev > 0 else last_rev
for cs in repo[last_rev:last_rev + parse_limit]:
last_cs = cs # remember last parsed changeset
k = lmktime([cs.date.timetuple()[0], cs.date.timetuple()[1],
cs.date.timetuple()[2], 0, 0, 0, 0, 0, 0])
if akc(cs.author) in co_day_auth_aggr:
l = [timegetter(x) for x in
co_day_auth_aggr[akc(cs.author)]['data']]
time_pos = l.index(k)
except ValueError:
time_pos = False
if time_pos >= 0 and time_pos is not False:
datadict = \
co_day_auth_aggr[akc(cs.author)]['data'][time_pos]
datadict["commits"] += 1
datadict["added"] += len(cs.added)
datadict["changed"] += len(cs.changed)
datadict["removed"] += len(cs.removed)
if k >= ts_min_y and k <= ts_max_y or skip_date_limit:
datadict = {"time": k,
"commits": 1,
"added": len(cs.added),
"changed": len(cs.changed),
"removed": len(cs.removed),
}
co_day_auth_aggr[akc(cs.author)]['data']\
.append(datadict)
co_day_auth_aggr[akc(cs.author)] = {
"label": akc(cs.author),
"data": [{"time":k,
"commits":1,
"added":len(cs.added),
"changed":len(cs.changed),
"removed":len(cs.removed),
}],
"schema": ["commits"],
#gather all data by day
@@ -8,193 +8,193 @@
:created_on: Aug 17, 2010
:copyright: (C) 2009-2010 Marcin Kuzminski <marcin@python-works.com>
#to get the rhodecode import
sys.path.append(dn(dn(dn(os.path.realpath(__file__)))))
from string import strip
from shutil import rmtree
from whoosh.analysis import RegexTokenizer, LowercaseFilter, StopFilter
from whoosh.fields import TEXT, ID, STORED, Schema, FieldType
from whoosh.index import create_in, open_dir
from whoosh.formats import Characters
from whoosh.highlight import highlight, SimpleFragmenter, HtmlFormatter
from webhelpers.html.builder import escape
from rhodecode.model.scm import ScmModel
from rhodecode.model.repo import RepoModel
from rhodecode.config.environment import load_environment
from rhodecode.lib import LANGUAGES_EXTENSIONS_MAP
from rhodecode.lib.utils import BasePasterCommand, Command, add_cache
#EXTENSIONS WE WANT TO INDEX CONTENT OFF
INDEX_EXTENSIONS = LANGUAGES_EXTENSIONS_MAP.keys()
#CUSTOM ANALYZER wordsplit + lowercase filter
ANALYZER = RegexTokenizer(expression=r"\w+") | LowercaseFilter()
#INDEX SCHEMA DEFINITION
SCHEMA = Schema(owner=TEXT(),
repository=TEXT(stored=True),
path=TEXT(stored=True),
content=FieldType(format=Characters(ANALYZER),
scorable=True, stored=True),
modtime=STORED(), extension=TEXT(stored=True))
IDX_NAME = 'HG_INDEX'
FORMATTER = HtmlFormatter('span', between='\n<span class="break">...</span>\n')
FRAGMENTER = SimpleFragmenter(200)
class MakeIndex(BasePasterCommand):
max_args = 1
min_args = 1
usage = "CONFIG_FILE"
summary = "Creates index for full text search given configuration file"
group_name = "RhodeCode"
takes_config_file = -1
parser = Command.standard_parser(verbose=True)
def command(self):
repo_location = self.options.repo_location \
if self.options.repo_location else RepoModel().repos_path
repo_list = map(strip, self.options.repo_list.split(',')) \
if self.options.repo_list else None
#======================================================================
# WHOOSH DAEMON
from rhodecode.lib.pidlock import LockHeld, DaemonLock
l = DaemonLock(file=jn(dn(dn(index_location)), 'make_index.lock'))
l = DaemonLock(file_=jn(dn(dn(index_location)), 'make_index.lock'))
repo_location=repo_location,
repo_list=repo_list)\
.run(full_index=self.options.full_index)
sys.exit(1)
def update_parser(self):
self.parser.add_option('--repo-location',
action='store',
dest='repo_location',
help="Specifies repositories location to index OPTIONAL",
)
self.parser.add_option('--index-only',
dest='repo_list',
help="Specifies a comma separated list of repositores "
"to build index on OPTIONAL",
self.parser.add_option('-f',
action='store_true',
dest='full_index',
help="Specifies that index should be made full i.e"
" destroy old and build from scratch",
default=False)
def __init__(self, search_type, searcher, matcher, highlight_items):
self.search_type = search_type
self.searcher = searcher
self.matcher = matcher
self.highlight_items = highlight_items
self.fragment_size = 200 / 2
def doc_ids(self):
docs_id = []
while self.matcher.is_active():
docnum = self.matcher.id()
chunks = [offsets for offsets in self.get_chunks()]
docs_id.append([docnum, chunks])
self.matcher.next()
return docs_id
def __str__(self):
return '<%s at %s>' % (self.__class__.__name__, len(self.doc_ids))
def __repr__(self):
return self.__str__()
def __len__(self):
return len(self.doc_ids)
def __iter__(self):
Allows Iteration over results,and lazy generate content
*Requires* implementation of ``__getitem__`` method.
for docid in self.doc_ids:
yield self.get_full_content(docid)
def __getitem__(self, key):
Slicing of resultWrapper
i, j = key.start, key.stop
slice = []
for docid in self.doc_ids[i:j]:
slice.append(self.get_full_content(docid))
return slice
def get_full_content(self, docid):
res = self.searcher.stored_fields(docid[0])
f_path = res['path'][res['path'].find(res['repository']) \
+ len(res['repository']):].lstrip('/')
content_short = self.get_short_content(res, docid[1])
res.update({'content_short':content_short,
'content_short_hl':self.highlight(content_short),
'f_path':f_path})
return res
def get_short_content(self, res, chunks):
return ''.join([res['content'][chunk[0]:chunk[1]] for chunk in chunks])
def get_chunks(self):
Smart function that implements chunking the content
but not overlap chunks so it doesn't highlight the same
close occurrences twice.
import time
import errno
from warnings import warn
from multiprocessing.util import Finalize
from rhodecode import __platform__, PLATFORM_WIN
if __platform__ in PLATFORM_WIN:
import ctypes
def kill(pid, sig):
"""kill function for Win32"""
kernel32 = ctypes.windll.kernel32
handle = kernel32.OpenProcess(1, 0, pid)
return (0 != kernel32.TerminateProcess(handle, 0))
kill = os.kill
class LockHeld(Exception):
pass
class DaemonLock(object):
"""daemon locking
USAGE:
l = DaemonLock(desc='test lock')
l = DaemonLock(file_='/path/tolockfile',desc='test lock')
main()
def __init__(self, file=None, callbackfn=None,
def __init__(self, file_=None, callbackfn=None,
desc='daemon lock', debug=False):
self.pidfile = file if file else os.path.join(
self.pidfile = file_ if file_ else os.path.join(
os.path.dirname(__file__),
'running.lock')
self.callbackfn = callbackfn
self.desc = desc
self.debug = debug
self.held = False
#run the lock automatically !
self.lock()
self._finalize = Finalize(self, DaemonLock._on_finalize,
args=(self, debug), exitpriority=10)
@staticmethod
def _on_finalize(lock, debug):
if lock.held:
if debug:
print 'leck held finilazing and running lock.release()'
def lock(self):
locking function, if lock is present it
will raise LockHeld exception
lockname = '%s' % (os.getpid())
if self.debug:
print 'running lock'
self.trylock()
self.makelock(lockname, self.pidfile)
def trylock(self):
running_pid = False
print 'checking for already running process'
pidfile = open(self.pidfile, "r")
pidfile.seek(0)
running_pid = int(pidfile.readline())
pidfile.close()
print ('lock file present running_pid: %s, '
'checking for execution') % running_pid
# Now we check the PID from lock file matches to the current
# process PID
if running_pid:
kill(running_pid, 0)
except OSError, exc:
if exc.errno in (errno.ESRCH, errno.EPERM):
print ("Lock File is there but"
" the program is not running")
print "Removing lock file for the: %s" % running_pid
self.release()
raise
print "You already have an instance of the program running"
print "It is running as process %s" % running_pid
raise LockHeld()
except IOError, e:
if e.errno != 2:
def release(self):
"""releases the pid by removing the pidfile
print 'trying to release the pidlock'
if self.callbackfn:
#execute callback function on release
print 'executing callback function %s' % self.callbackfn
self.callbackfn()
print 'removing pidfile %s' % self.pidfile
os.remove(self.pidfile)
except OSError, e:
print 'removing pidfile failed %s' % e
def makelock(self, lockname, pidfile):
this function will make an actual lock
:param lockname: acctual pid of file
:param pidfile: the file to write the pid in
print 'creating a file %s and pid: %s' % (pidfile, lockname)
@@ -393,193 +393,193 @@ def repo2db_mapper(initial_repo_list, re
group = map_groups(name.split(os.sep))
if not rm.get_by_repo_name(name, cache=False):
log.info('repository %s not found creating default', name)
added.append(name)
form_data = {
'repo_name': name,
'repo_name_full': name,
'repo_type': repo.alias,
'description': repo.description \
if repo.description != 'unknown' else \
'%s repository' % name,
'private': False,
'group_id': getattr(group, 'group_id', None)
rm.create(form_data, user, just_db=True)
removed = []
if remove_obsolete:
#remove from database those repositories that are not in the filesystem
for repo in sa.query(Repository).all():
if repo.repo_name not in initial_repo_list.keys():
removed.append(repo.repo_name)
sa.delete(repo)
sa.commit()
return added, removed
#set cache regions for beaker so celery can utilise it
def add_cache(settings):
cache_settings = {'regions': None}
for key in settings.keys():
for prefix in ['beaker.cache.', 'cache.']:
if key.startswith(prefix):
name = key.split(prefix)[1].strip()
cache_settings[name] = settings[key].strip()
if cache_settings['regions']:
for region in cache_settings['regions'].split(','):
region = region.strip()
region_settings = {}
for key, value in cache_settings.items():
if key.startswith(region):
region_settings[key.split('.')[1]] = value
region_settings['expire'] = int(region_settings.get('expire',
60))
region_settings.setdefault('lock_dir',
cache_settings.get('lock_dir'))
region_settings.setdefault('data_dir',
cache_settings.get('data_dir'))
if 'type' not in region_settings:
region_settings['type'] = cache_settings.get('type',
'memory')
beaker.cache.cache_regions[region] = region_settings
def get_current_revision():
"""Returns tuple of (number, id) from repository containing this package
or None if repository could not be found.
from vcs import get_repo
from vcs.utils.helpers import get_scm
from vcs.exceptions import RepositoryError, VCSError
repopath = os.path.join(os.path.dirname(__file__), '..', '..')
scm = get_scm(repopath)[0]
repo = get_repo(path=repopath, alias=scm)
tip = repo.get_changeset()
return (tip.revision, tip.short_id)
except (ImportError, RepositoryError, VCSError), err:
logging.debug("Cannot retrieve rhodecode's revision. Original error "
"was: %s" % err)
return None
#==============================================================================
# TEST FUNCTIONS AND CREATORS
def create_test_index(repo_location, config, full_index):
Makes default test index
:param config: test config
:param full_index:
repo_location = repo_location
index_location = os.path.join(config['app_conf']['index_dir'])
if not os.path.exists(index_location):
os.makedirs(index_location)
l = DaemonLock(file=jn(dn(index_location), 'make_index.lock'))
l = DaemonLock(file_=jn(dn(index_location), 'make_index.lock'))
repo_location=repo_location)\
def create_test_env(repos_test_path, config):
"""Makes a fresh database and
install test repository into tmp dir
from rhodecode.lib.db_manage import DbManage
from rhodecode.tests import HG_REPO, GIT_REPO, NEW_HG_REPO, NEW_GIT_REPO, \
HG_FORK, GIT_FORK, TESTS_TMP_PATH
import tarfile
import shutil
from os.path import abspath
# PART ONE create db
dbconf = config['sqlalchemy.db1.url']
log.debug('making test db %s', dbconf)
# create test dir if it doesn't exist
if not os.path.isdir(repos_test_path):
log.debug('Creating testdir %s' % repos_test_path)
os.makedirs(repos_test_path)
dbmanage = DbManage(log_sql=True, dbconf=dbconf, root=config['here'],
tests=True)
dbmanage.create_tables(override=True)
dbmanage.create_settings(dbmanage.config_prompt(repos_test_path))
dbmanage.create_default_user()
dbmanage.admin_prompt()
dbmanage.create_permissions()
dbmanage.populate_default_permissions()
# PART TWO make test repo
log.debug('making test vcs repositories')
idx_path = config['app_conf']['index_dir']
data_path = config['app_conf']['cache_dir']
#clean index and data
if idx_path and os.path.exists(idx_path):
log.debug('remove %s' % idx_path)
shutil.rmtree(idx_path)
if data_path and os.path.exists(data_path):
log.debug('remove %s' % data_path)
shutil.rmtree(data_path)
#CREATE DEFAULT HG REPOSITORY
cur_dir = dn(dn(abspath(__file__)))
tar = tarfile.open(jn(cur_dir, 'tests', "vcs_test_hg.tar.gz"))
tar.extractall(jn(TESTS_TMP_PATH, HG_REPO))
tar.close()
# PASTER COMMANDS
class BasePasterCommand(Command):
Abstract Base Class for paster commands.
The celery commands are somewhat aggressive about loading
celery.conf, and since our module sets the `CELERY_LOADER`
environment variable to our loader, we have to bootstrap a bit and
make sure we've had a chance to load the pylons config off of the
command line, otherwise everything fails.
min_args_error = "Please provide a paster config file as an argument."
takes_config_file = 1
requires_config_file = True
def notify_msg(self, msg, log=False):
"""Make a notification to user, additionally if logger is passed
it logs this action using given logger
:param msg: message that will be printed to user
:param log: logging instance, to use to additionally log this message
if log and isinstance(log, logging):
log(msg)
def run(self, args):
Overrides Command.run
Checks for a config file argument and loads it.
if len(args) < self.min_args:
raise BadCommand(
Status change: