# HG changeset patch # User Steve Losh # Date 1348766561 14400 # Node ID 9626235f7fe1ef677f964eb0d395702be73e9598 # Parent cf6c3851d091fcefa07977f59c5a91f9918b4403 Second try at git's hilariously-documented ignore format. diff -r cf6c3851d091 -r 9626235f7fe1 ffind --- a/ffind Thu Sep 27 00:00:33 2012 -0400 +++ b/ffind Thu Sep 27 13:22:41 2012 -0400 @@ -132,64 +132,105 @@ return lambda s: l in s def compile_git(line): + original_line = line pat = '' - # The following comments are (mostly) from gitignore(5). - - # If the pattern ends with a slash, it is removed for the purpose of the - # following description, but it would only find a match with a directory. In - # other words, foo/ will match a directory foo and paths underneath it, but - # will not match a regular file or a symbolic link foo (this is consistent - # with the way how pathspec works in general in git). - # directories_only = line.endswith('/') + # From man gitignore 5: + # If the pattern ends with a slash, it is removed for the purpose of the + # following description, but it would only find a match with + # a directory. In other words, foo/ will match a directory foo and paths + # underneath it, but will not match a regular file or a symbolic link + # foo (this is consistent with the way how pathspec works in general in + # git). + # + # A leading slash matches the beginning of the pathname. For example, + # "/*.c" matches "cat-file.c" but not "mozilla-sha1/sha1.c". + # + # If the pattern does not contain a slash /, git treats it as a shell + # glob pattern and checks for a match against the pathname relative to + # the location of the .gitignore file (relative to the toplevel of the + # work tree if not from a .gitignore file). + # + # Otherwise, git treats the pattern as a shell glob suitable for + # consumption by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the + # pattern will not match a / in the pathname. For example, + # "Documentation/*.html" matches "Documentation/git.html" but not + # "Documentation/ppc/ppc.html" or "tools/perf/Documentation/perf.html". + # + # If you can't tell what the hell this means you're not alone, because git's + # documentation is fucking inscrutable. Here's what I've come up with from + # trial and error: + # + # 0. Patterns ending in a slash will only match directories, and then you + # can ignore that slash for the rest of these rules. + # 1. Patterns are shell globs, except * doesn't match / and there's no **. + # 2. Patterns without a slash search the basename of the page, for example: + # the 'file.txt' in '/foo/bar/file.txt'. + # 3. Patterns with a slash search against the entire path. + # 4. All matching must match the entire string it's searching. For example: + # + # 'am' will not ignore '/foo/bar/spam' + # it matches against the basename 'spam' but does not match all of it + # + # 'bar/spam' will not ignore '/foo/bar/spam' + # it matches against the full path (because it has a slash) but does not + # match all of it. + # 5. A leading slash doesn't affect the matching, but does turn a + # "pattern with no slash" into a "pattern with a slash". So: + # + # 'bar' will ignore '/foo/bar/spam' (actually it'll ignore bar entirely) + # it matches against the basename 'bar' (because there's no slash) when + # at that level + # + # '/bar' will not ignore '/foo/bar/spam' + # it matches against the entire path '/foo/bar' (because there is + # a slash) when at that level - # A leading slash matches the beginning of the pathname. For example, "/*.c" - # matches "cat-file.c" but not "mozilla-sha1/sha1.c". - if line.startswith('/'): + if line.endswith('/'): + # TODO: Deal with this. + # directories_only = True + line = line[:-1] + + has_slash = '/' in line + + line = line.lstrip('/') + + if has_slash: + # Patterns with a slash have to match against the entire pathname. So + # they need to be rooted at the beginning. pat += '^./' - line = line[1:] + else: + # Patterns without a slash match against just the basename, which we'll + # simulate by including the (final) divider in the pattern. + pat += '/' - def _eat_glob(chs): - pat = '' - while chs: + # The rest of the pattern is git's variation on shell globs. + # Mostly normal shell globs, but there's no **. + chs = list(line) + while chs: + ch = chs.pop(0) + if ch == '?': + pat += '.' + elif ch == '*': + pat += '[^/]*' + elif ch == '[': + pat += '[' ch = chs.pop(0) - if ch == '?': - pat += '.' - elif ch == '*': - pat += '[^/]*' - elif ch == '[': - pat += '[' + while chs and ch != ']': + pat += ch ch = chs.pop(0) - while chs and ch != ']': - pat += ch - ch = chs.pop(0) - pat += ']' - else: - pat += re.escape(ch) - return pat + pat += ']' + else: + pat += re.escape(ch) - chs = list(line) - # I can't tell what the difference is between these two cases because git's - # documentation is fucking inscrutable. - if '/' not in line: - # If the pattern does not contain a slash /, git treats it as a shell - # glob pattern and checks for a match against the pathname relative to - # the location of the .gitignore file (relative to the toplevel of the - # work tree if not from a .gitignore file). - pat += _eat_glob(chs) - else: - # Otherwise, git treats the pattern as a shell glob suitable for - # consumption by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the - # pattern will not match a / in the pathname. For example, - # "Documentation/*.html" matches "Documentation/git.html" but not - # "Documentation/ppc/ppc.html" or "tools/perf/Documentation/perf.html". - pat += _eat_glob(chs) + # Patterns always have the be anchored at the end. + pat += '$' try: regex = re.compile(pat) return lambda s: regex.search(s) except: - warn("could not parse gitignore pattern '%s'" % line) + warn("could not parse gitignore pattern '%s'" % original_line) return lambda s: True