--- a/ffind Thu Sep 27 00:00:33 2012 -0400
+++ b/ffind Thu Sep 27 13:22:41 2012 -0400
@@ -132,64 +132,105 @@
return lambda s: l in s
def compile_git(line):
+ original_line = line
pat = ''
- # The following comments are (mostly) from gitignore(5).
-
- # If the pattern ends with a slash, it is removed for the purpose of the
- # following description, but it would only find a match with a directory. In
- # other words, foo/ will match a directory foo and paths underneath it, but
- # will not match a regular file or a symbolic link foo (this is consistent
- # with the way how pathspec works in general in git).
- # directories_only = line.endswith('/')
+ # From man gitignore 5:
+ # If the pattern ends with a slash, it is removed for the purpose of the
+ # following description, but it would only find a match with
+ # a directory. In other words, foo/ will match a directory foo and paths
+ # underneath it, but will not match a regular file or a symbolic link
+ # foo (this is consistent with the way how pathspec works in general in
+ # git).
+ #
+ # A leading slash matches the beginning of the pathname. For example,
+ # "/*.c" matches "cat-file.c" but not "mozilla-sha1/sha1.c".
+ #
+ # If the pattern does not contain a slash /, git treats it as a shell
+ # glob pattern and checks for a match against the pathname relative to
+ # the location of the .gitignore file (relative to the toplevel of the
+ # work tree if not from a .gitignore file).
+ #
+ # Otherwise, git treats the pattern as a shell glob suitable for
+ # consumption by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the
+ # pattern will not match a / in the pathname. For example,
+ # "Documentation/*.html" matches "Documentation/git.html" but not
+ # "Documentation/ppc/ppc.html" or "tools/perf/Documentation/perf.html".
+ #
+ # If you can't tell what the hell this means you're not alone, because git's
+ # documentation is fucking inscrutable. Here's what I've come up with from
+ # trial and error:
+ #
+ # 0. Patterns ending in a slash will only match directories, and then you
+ # can ignore that slash for the rest of these rules.
+ # 1. Patterns are shell globs, except * doesn't match / and there's no **.
+ # 2. Patterns without a slash search the basename of the page, for example:
+ # the 'file.txt' in '/foo/bar/file.txt'.
+ # 3. Patterns with a slash search against the entire path.
+ # 4. All matching must match the entire string it's searching. For example:
+ #
+ # 'am' will not ignore '/foo/bar/spam'
+ # it matches against the basename 'spam' but does not match all of it
+ #
+ # 'bar/spam' will not ignore '/foo/bar/spam'
+ # it matches against the full path (because it has a slash) but does not
+ # match all of it.
+ # 5. A leading slash doesn't affect the matching, but does turn a
+ # "pattern with no slash" into a "pattern with a slash". So:
+ #
+ # 'bar' will ignore '/foo/bar/spam' (actually it'll ignore bar entirely)
+ # it matches against the basename 'bar' (because there's no slash) when
+ # at that level
+ #
+ # '/bar' will not ignore '/foo/bar/spam'
+ # it matches against the entire path '/foo/bar' (because there is
+ # a slash) when at that level
- # A leading slash matches the beginning of the pathname. For example, "/*.c"
- # matches "cat-file.c" but not "mozilla-sha1/sha1.c".
- if line.startswith('/'):
+ if line.endswith('/'):
+ # TODO: Deal with this.
+ # directories_only = True
+ line = line[:-1]
+
+ has_slash = '/' in line
+
+ line = line.lstrip('/')
+
+ if has_slash:
+ # Patterns with a slash have to match against the entire pathname. So
+ # they need to be rooted at the beginning.
pat += '^./'
- line = line[1:]
+ else:
+ # Patterns without a slash match against just the basename, which we'll
+ # simulate by including the (final) divider in the pattern.
+ pat += '/'
- def _eat_glob(chs):
- pat = ''
- while chs:
+ # The rest of the pattern is git's variation on shell globs.
+ # Mostly normal shell globs, but there's no **.
+ chs = list(line)
+ while chs:
+ ch = chs.pop(0)
+ if ch == '?':
+ pat += '.'
+ elif ch == '*':
+ pat += '[^/]*'
+ elif ch == '[':
+ pat += '['
ch = chs.pop(0)
- if ch == '?':
- pat += '.'
- elif ch == '*':
- pat += '[^/]*'
- elif ch == '[':
- pat += '['
+ while chs and ch != ']':
+ pat += ch
ch = chs.pop(0)
- while chs and ch != ']':
- pat += ch
- ch = chs.pop(0)
- pat += ']'
- else:
- pat += re.escape(ch)
- return pat
+ pat += ']'
+ else:
+ pat += re.escape(ch)
- chs = list(line)
- # I can't tell what the difference is between these two cases because git's
- # documentation is fucking inscrutable.
- if '/' not in line:
- # If the pattern does not contain a slash /, git treats it as a shell
- # glob pattern and checks for a match against the pathname relative to
- # the location of the .gitignore file (relative to the toplevel of the
- # work tree if not from a .gitignore file).
- pat += _eat_glob(chs)
- else:
- # Otherwise, git treats the pattern as a shell glob suitable for
- # consumption by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the
- # pattern will not match a / in the pathname. For example,
- # "Documentation/*.html" matches "Documentation/git.html" but not
- # "Documentation/ppc/ppc.html" or "tools/perf/Documentation/perf.html".
- pat += _eat_glob(chs)
+ # Patterns always have the be anchored at the end.
+ pat += '$'
try:
regex = re.compile(pat)
return lambda s: regex.search(s)
except:
- warn("could not parse gitignore pattern '%s'" % line)
+ warn("could not parse gitignore pattern '%s'" % original_line)
return lambda s: True