1 # -*- coding: utf-8 -*-
8 import cPickle as pickle
9 from fnmatch import fnmatch
10 from trac.versioncontrol.api import Changeset, Node
11 from trac.util.text import to_unicode
16 from sets import Set as set
18 _split_args = re.compile(r',\s*').split
22 # Make sure that empty strings result in empty lists
23 if len(l) == 1 and l[0] == "":
29 class TagIndexer(object):
31 def __init__(self, env, repo):
34 c = lambda x, d: split_args(env.config.get('code-tags', x, d))
35 self.tags = c('tags', 'XXX, TODO, FIXME')
36 self.scan_folders = c('scan_folders', '*')
37 self.exclude_folders = c('exclude_folders', '')
38 self.scan_files = c('scan_files', '*')
39 self.enable_unicode = env.config.getbool('code-tags', 'enable_unicode', True)
42 for word in self.tags:
43 p.append(r'\b' + re.escape(word) + r'\b')
44 self.tag_re = re.compile(r'(%s)\:?\s*(.*?)\s*$' % '|'.join(p))
46 cdir = os.path.join(os.path.abspath(env.path), 'cache', 'codetags')
47 if not os.path.exists(cdir):
51 def is_path_valid(self, path):
52 """Determine whether the given path is valid path to scan."""
53 for rule in self.scan_folders:
54 if fnmatch(path, rule):
57 def contains_valid_paths(self, node):
58 if node.kind != Node.DIRECTORY:
61 # Check whether the given node is a parent directory of the folders to scan
62 for rule in self.scan_folders:
63 subdirs = rule.split('/')
64 for depth in range(0, len(subdirs)):
65 subdir = '/'.join(subdirs[:depth + 1])
66 if fnmatch(node.path, subdir):
71 def walk_repo(self, repo, rev):
72 """Walks through the whole repository and yields all files
73 matching the settings from the config.
74 This method is just used for bootstrapping the cache."""
75 def do_walk(path, scan):
76 node = repo.get_node(path, rev)
77 basename = posixpath.basename(path)
78 if node.kind == Node.DIRECTORY:
79 # Skip directories (and all of their subdirectories) that are excluded
80 for rule in self.exclude_folders:
81 if fnmatch(node.path, rule):
84 do_scan = self.is_path_valid(node.path)
85 for subnode in node.get_entries():
86 if self.contains_valid_paths(subnode):
87 for result in do_walk(subnode.path, do_scan):
90 for rule in self.scan_files:
91 if fnmatch(node.path, rule):
94 return do_walk('/', True)
96 def load_from_cache(self):
97 """Load the tags from the cache."""
98 fn = os.path.join(self.cachedir, 'tags')
99 if not os.path.exists(fn):
102 result = pickle.load(f)
106 def get_youngest_rev(self):
107 """Work around a bug in Trac's CachedRepository class which causes
108 Subversion revision 0 to be converted into None."""
109 rev = self.repo.get_youngest_rev()
110 if rev is None and hasattr(self.repo, 'repos'):
111 rev = self.repo.repos.get_youngest_rev()
114 def save_to_cache(self, folders):
115 """Saves changes to the cache."""
116 # update cache revision
117 f = file(os.path.join(self.cachedir, 'revision'), 'wb')
118 pickle.dump(self.get_youngest_rev(), f)
121 f = file(os.path.join(self.cachedir, 'tags'), 'wb')
122 pickle.dump(folders, f, 2)
125 def get_cache_revision(self):
126 """Returns the revision of the cache."""
127 fn = os.path.join(self.cachedir, 'revision')
128 if not os.path.exists(fn):
135 def get_changed_files(self):
136 """Returns the files which require a rescan from the
137 last cached revision to the current one."""
138 cur_rev = self.get_youngest_rev()
139 cached_rev = self.get_cache_revision()
140 # special case: scan all files
141 if cached_rev is None:
142 for fn in self.walk_repo(self.repo, cur_rev):
145 # special case: scan no file
146 elif not self.repo.rev_older_than(cached_rev, cur_rev):
148 # otherwise yield changed files
151 while self.repo.rev_older_than(rev, cur_rev):
152 rev = self.repo.next_rev(rev)
153 cset = self.repo.get_changeset(rev)
154 for path, kind, change, base_path, base_rev in cset.get_changes():
155 if kind == Node.DIRECTORY:
157 folder = posixpath.dirname(path)
158 if not self.is_path_valid(folder):
160 if change == Changeset.MOVE:
161 changes.add(base_path)
162 for rule in self.scan_files:
163 if fnmatch(path, rule):
169 def get_new_tags(self):
170 """Parses the text to load the new tags."""
172 for fn, rev in self.get_changed_files():
174 node = self.repo.get_node(fn, rev)
176 # Deal with deleted files by appending an empty file node, to
177 # flush the cache for this file if there's any.
178 files[fn] = [{'path': fn}]
181 f = node.get_content()
183 if (self.enable_unicode):
184 content = to_unicode(content)
185 lines = content.splitlines()
186 if hasattr(f, 'close'):
188 for idx, line in enumerate(lines):
189 m = self.tag_re.search(line)
191 files.setdefault(node.path, []).append({
198 # File was returned by get_changed_files, but no tags where found.
199 # Thus return an empty file node instead (to flush cache for this
200 # file if there's any).
201 if not node.path in files:
202 files[node.path] = [{'path': node.path}]
206 def update_cache(self):
207 """Updates the cache."""
208 files = self.load_from_cache()
209 # update with new files
210 new_tags = self.get_new_tags()
212 for path, matches in new_tags.iteritems():
213 if len(matches) == 1 and 'tag' not in matches[0]:
214 # Clean up files without tags in them (in the latest revision that is)
218 files[path] = matches
219 # Save when things have changed or no previous cache existed
220 if new_tags or files == {}:
221 self.save_to_cache(files)
225 def get_taglist(self):
226 """Returns a list of active tags and updates cache."""
227 files = self.update_cache()
228 # sort folders and create dict for hdf
230 items = files.items()
232 for filepath, matches in items:
233 folders.setdefault(posixpath.dirname(filepath), []).extend(matches)
234 items = folders.items()
237 for path, matches in items:
239 'href': self.env.href.browser(path),
242 'class': 'tag-%s' % m['tag'].lower(),
244 self.env.href.browser(m['path']),
247 'basename': posixpath.basename(m['path']),