@@ -125,98 +125,102 @@ class WhooshIndexingDaemon(object):
index_rev = self._get_index_revision(repo)
cs = repo.get_changeset(index_rev)
return cs
def get_paths(self, repo):
"""
recursive walk in root dir and return a set of all path in that dir
based on repository walk function
index_paths_ = set()
try:
cs = self._get_index_changeset(repo)
for _topnode, _dirs, files in cs.walk('/'):
for f in files:
index_paths_.add(jn(safe_str(repo.path), safe_str(f.path)))
except RepositoryError:
log.debug(traceback.format_exc())
pass
return index_paths_
def get_node(self, repo, path, index_rev=None):
gets a filenode based on given full path. It operates on string for
hg git compatibility.
:param repo: scm repo instance
:param path: full path including root location
:return: FileNode
# FIXME: paths should be normalized ... or even better: don't include repo.path
path = safe_str(path)
repo_path = safe_str(repo.path)
assert path.startswith(repo_path)
assert path[len(repo_path)] in (os.path.sep, os.path.altsep)
node_path = path[len(repo_path) + 1:]
cs = self._get_index_changeset(repo, index_rev=index_rev)
node = cs.get_node(node_path)
return node
def get_node_mtime(self, node):
return mktime(node.last_changeset.date.timetuple())
def add_doc(self, writer, path, repo, repo_name, index_rev=None):
Adding doc to writer this function itself fetches data from
the instance of vcs backend
node = self.get_node(repo, path, index_rev)
except (ChangesetError, NodeDoesNotExistError):
log.debug("couldn't add doc - %s did not have %r at %s", repo, path, index_rev)
return 0, 0
indexed = indexed_w_content = 0
# we just index the content of chosen files, and skip binary files
if node.extension in INDEX_EXTENSIONS and not node.is_binary:
u_content = node.content
if not isinstance(u_content, unicode):
log.warning(' >> %s Could not get this content as unicode '
'replacing with empty content' % path)
u_content = u''
else:
log.debug(' >> %s [WITH CONTENT]', path)
indexed_w_content += 1
log.debug(' >> %s', path)
# just index file name without it's content
indexed += 1
p = safe_unicode(path)
writer.add_document(
fileid=p,
owner=unicode(repo.contact),
repository=safe_unicode(repo_name),
path=p,
content=u_content,
modtime=self.get_node_mtime(node),
extension=node.extension
)
return indexed, indexed_w_content
def index_changesets(self, writer, repo_name, repo, start_rev=None):
Add all changeset in the vcs repo starting at start_rev
to the index writer
:param writer: the whoosh index writer to add to
:param repo_name: name of the repository from whence the
changeset originates including the repository group
:param repo: the vcs repository instance to index changesets for,
the presumption is the repo has changesets to index
:param start_rev=None: the full sha id to start indexing from
if start_rev is None then index from the first changeset in
the repo
if start_rev is None:
start_rev = repo[0].raw_id
Status change: