@@ -32,24 +32,25 @@ import traceback
from shutil import rmtree
from time import mktime
from os.path import dirname as dn
from os.path import join as jn
#to get the rhodecode import
project_path = dn(dn(dn(dn(os.path.realpath(__file__)))))
sys.path.append(project_path)
from rhodecode.config.conf import INDEX_EXTENSIONS
from rhodecode.model.scm import ScmModel
from rhodecode.model.db import Repository
from rhodecode.lib.utils2 import safe_unicode, safe_str
from rhodecode.lib.indexers import SCHEMA, IDX_NAME, CHGSETS_SCHEMA, \
CHGSET_IDX_NAME
from rhodecode.lib.vcs.exceptions import ChangesetError, RepositoryError, \
NodeDoesNotExistError
from whoosh.index import create_in, open_dir, exists_in
from whoosh.query import *
from whoosh.qparser import QueryParser
log = logging.getLogger('whoosh_indexer')
@@ -89,55 +90,68 @@ class WhooshIndexingDaemon(object):
#filter update repo list
self.filtered_repo_update_paths = {}
if repo_update_list:
for repo_name, repo in self.repo_paths.items():
if repo_name in repo_update_list:
self.filtered_repo_update_paths[repo_name] = repo
self.repo_paths = self.filtered_repo_update_paths
self.initial = True
if not os.path.isdir(self.index_location):
os.makedirs(self.index_location)
log.info('Cannot run incremental index since it does not'
' yet exist running full build')
log.info('Cannot run incremental index since it does not '
'yet exist running full build')
elif not exists_in(self.index_location, IDX_NAME):
log.info('Running full index build as the file content'
' index does not exist')
log.info('Running full index build as the file content '
'index does not exist')
elif not exists_in(self.index_location, CHGSET_IDX_NAME):
log.info('Running full index build as the changeset'
log.info('Running full index build as the changeset '
else:
self.initial = False
def _get_index_revision(self, repo):
db_repo = Repository.get_by_repo_name(repo.name)
landing_rev = 'tip'
if db_repo:
landing_rev = db_repo.landing_rev
return landing_rev
def _get_index_changeset(self, repo):
index_rev = self._get_index_revision(repo)
cs = repo.get_changeset(index_rev)
return cs
def get_paths(self, repo):
"""
recursive walk in root dir and return a set of all path in that dir
based on repository walk function
index_paths_ = set()
try:
tip = repo.get_changeset('tip')
for _topnode, _dirs, files in tip.walk('/'):
cs = self._get_index_changeset(repo)
for _topnode, _dirs, files in cs.walk('/'):
for f in files:
index_paths_.add(jn(safe_str(repo.path), safe_str(f.path)))
except RepositoryError:
log.debug(traceback.format_exc())
pass
return index_paths_
def get_node(self, repo, path):
n_path = path[len(repo.path) + 1:]
node = repo.get_changeset().get_node(n_path)
node = cs.get_node(n_path)
return node
def get_node_mtime(self, node):
return mktime(node.last_changeset.date.timetuple())
def add_doc(self, writer, path, repo, repo_name):
Adding doc to writer this function itself fetches data from
the instance of vcs backend
node = self.get_node(repo, path)
@@ -213,25 +227,26 @@ class WhooshIndexingDaemon(object):
log.debug('indexed %d changesets for repo %s' % (indexed, repo_name))
return indexed
def index_files(self, file_idx_writer, repo_name, repo):
Index files for given repo_name
:param file_idx_writer: the whoosh index writer to add to
:param repo_name: name of the repository we're indexing
:param repo: instance of vcs repo
i_cnt = iwc_cnt = 0
log.debug('building index for [%s]' % repo.path)
log.debug('building index for %s @revision:%s' % (repo.path,
self._get_index_revision(repo)))
for idx_path in self.get_paths(repo):
i, iwc = self.add_doc(file_idx_writer, idx_path, repo, repo_name)
i_cnt += i
iwc_cnt += iwc
log.debug('added %s files %s with content for repo %s' %
(i_cnt + iwc_cnt, iwc_cnt, repo.path))
return i_cnt, iwc_cnt
def update_changeset_index(self):
idx = open_dir(self.index_location, indexname=CHGSET_IDX_NAME)
@@ -267,33 +282,32 @@ class WhooshIndexingDaemon(object):
# delete the docs in the index for the previous
# last changeset(s)
for hit in results:
q = qp.parse(u"last:t AND %s AND raw_id:%s" %
(repo_name, hit['raw_id']))
writer.delete_by_query(q)
# index from the previous last changeset + all new ones
indexed_total += self.index_changesets(writer,
repo_name, repo, start_id)
writer_is_dirty = True
log.debug('indexed %s changesets for repo %s' % (
indexed_total, repo_name)
)
finally:
if writer_is_dirty:
log.debug('>> COMMITING CHANGES TO CHANGESET INDEX<<')
writer.commit(merge=True)
log.debug('>>> FINISHED REBUILDING CHANGESET INDEX <<<')
writer.cancel
log.debug('>> NOTHING TO COMMIT TO CHANGESET INDEX<<')
def update_file_index(self):
log.debug((u'STARTING INCREMENTAL INDEXING UPDATE FOR EXTENSIONS %s '
'AND REPOS %s') % (INDEX_EXTENSIONS, self.repo_paths.keys()))
idx = open_dir(self.index_location, indexname=self.indexname)
# The set of all paths in the index
indexed_paths = set()
# The set of all paths we need to re-index
to_index = set()
Status change: