diff --git a/rhodecode/lib/indexers/daemon.py b/rhodecode/lib/indexers/daemon.py --- a/rhodecode/lib/indexers/daemon.py +++ b/rhodecode/lib/indexers/daemon.py @@ -1,8 +1,15 @@ -#!/usr/bin/env python -# encoding: utf-8 -# whoosh indexer daemon for rhodecode -# Copyright (C) 2009-2010 Marcin Kuzminski -# +# -*- coding: utf-8 -*- +""" + rhodecode.lib.indexers.daemon + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + A daemon will read from task table and run tasks + + :created_on: Jan 26, 2010 + :author: marcink + :copyright: (C) 2009-2011 Marcin Kuzminski + :license: GPLv3, see COPYING for more details. +""" # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or @@ -15,14 +22,15 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -""" -Created on Jan 26, 2010 -@author: marcink -A deamon will read from task table and run tasks -""" +import os import sys -import os +import logging +import traceback + +from shutil import rmtree +from time import mktime + from os.path import dirname as dn from os.path import join as jn @@ -32,15 +40,14 @@ sys.path.append(project_path) from rhodecode.model.scm import ScmModel -from rhodecode.lib.helpers import safe_unicode -from whoosh.index import create_in, open_dir -from shutil import rmtree +from rhodecode.lib import safe_unicode from rhodecode.lib.indexers import INDEX_EXTENSIONS, SCHEMA, IDX_NAME -from time import mktime from vcs.exceptions import ChangesetError, RepositoryError -import logging +from whoosh.index import create_in, open_dir + + log = logging.getLogger('whooshIndexer') # create logger @@ -51,7 +58,8 @@ ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) # create formatter -formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") +formatter = logging.Formatter("%(asctime)s - %(name)s -" + " %(levelname)s - %(message)s") # add formatter to ch ch.setFormatter(formatter) @@ -61,11 +69,11 @@ log.addHandler(ch) class WhooshIndexingDaemon(object): """ - Deamon for atomic jobs + Daemon for atomic jobs """ def __init__(self, indexname='HG_INDEX', index_location=None, - repo_location=None, sa=None): + repo_location=None, sa=None, repo_list=None): self.indexname = indexname self.index_location = index_location @@ -76,7 +84,17 @@ class WhooshIndexingDaemon(object): if not repo_location: raise Exception('You have to provide repositories location') - self.repo_paths = ScmModel(sa).repo_scan(self.repo_location, None) + self.repo_paths = ScmModel(sa).repo_scan(self.repo_location) + + if repo_list: + filtered_repo_paths = {} + for repo_name, repo in self.repo_paths.items(): + if repo_name in repo_list: + filtered_repo_paths[repo_name] = repo + + self.repo_paths = filtered_repo_paths + + self.initial = False if not os.path.isdir(self.index_location): os.makedirs(self.index_location) @@ -90,14 +108,13 @@ class WhooshIndexingDaemon(object): """ index_paths_ = set() try: - for topnode, dirs, files in repo.walk('/', 'tip'): + tip = repo.get_changeset('tip') + for topnode, dirs, files in tip.walk('/'): for f in files: index_paths_.add(jn(repo.path, f.path)) - for dir in dirs: - for f in files: - index_paths_.add(jn(repo.path, f.path)) - except RepositoryError: + except RepositoryError, e: + log.debug(traceback.format_exc()) pass return index_paths_ @@ -109,14 +126,14 @@ class WhooshIndexingDaemon(object): def get_node_mtime(self, node): return mktime(node.last_changeset.date.timetuple()) - def add_doc(self, writer, path, repo): + def add_doc(self, writer, path, repo, repo_name): """Adding doc to writer this function itself fetches data from the instance of vcs backend""" node = self.get_node(repo, path) #we just index the content of chosen files, and skip binary files if node.extension in INDEX_EXTENSIONS and not node.is_binary: - + u_content = node.content if not isinstance(u_content, unicode): log.warning(' >> %s Could not get this content as unicode ' @@ -124,14 +141,14 @@ class WhooshIndexingDaemon(object): u_content = u'' else: log.debug(' >> %s [WITH CONTENT]' % path) - + else: log.debug(' >> %s' % path) #just index file name without it's content u_content = u'' writer.add_document(owner=unicode(repo.contact), - repository=safe_unicode(repo.name), + repository=safe_unicode(repo_name), path=safe_unicode(path), content=u_content, modtime=self.get_node_mtime(node), @@ -149,11 +166,11 @@ class WhooshIndexingDaemon(object): idx = create_in(self.index_location, SCHEMA, indexname=IDX_NAME) writer = idx.writer() - for cnt, repo in enumerate(self.repo_paths.values()): + for repo_name, repo in self.repo_paths.items(): log.debug('building index @ %s' % repo.path) for idx_path in self.get_paths(repo): - self.add_doc(writer, idx_path, repo) + self.add_doc(writer, idx_path, repo, repo_name) log.debug('>> COMMITING CHANGES <<') writer.commit(merge=True) @@ -200,12 +217,12 @@ class WhooshIndexingDaemon(object): # Loop over the files in the filesystem # Assume we have a function that gathers the filenames of the # documents to be indexed - for repo in self.repo_paths.values(): + for repo_name, repo in self.repo_paths.items(): for path in self.get_paths(repo): if path in to_index or path not in indexed_paths: # This is either a file that's changed, or a new file # that wasn't indexed before. So index it! - self.add_doc(writer, path, repo) + self.add_doc(writer, path, repo, repo_name) log.debug('re indexing %s' % path) log.debug('>> COMMITING CHANGES <<')