#!/usr/bin/env python
# -*- coding: utf8 -*-
# ____ ____ __ ____ __ _ ____ __ _ _ ____ __ __ _ ____
# ( __)( _ \( )( __)( ( \( \( ) ( \/ )___( __)( )( ( \( \
# ) _) ) / )( ) _) / / ) D (/ (_/\ ) /(___)) _) )( / / ) D (
# (__) (__\_)(__)(____)\_)__)(____/\____/(__/ (__) (__)\_)__)(____/
#
# The friendlier file finder.
import os
import optparse
import string
import sys
import re
from optparse import OptionParser, OptionGroup
# Constants -------------------------------------------------------------------
CASE_SENSITIVE = 1
CASE_INSENSITIVE = 2
CASE_SMART = 3
BYTE = 1
KILOBYTE = 1024 * BYTE
MEGABYTE = 1024 * KILOBYTE
GIGABYTE = 1024 * MEGABYTE
TERABYTE = 1024 * GIGABYTE
PETABYTE = 1024 * TERABYTE
VCS_DIRS = ['.hg', '.git', '.svn']
TYPE_FILE_REAL = 1
TYPE_FILE_SYMLINK = 2
TYPE_DIR_REAL = 3
TYPE_DIR_SYMLINK = 4
TYPES_FILE_REAL = set([TYPE_FILE_REAL])
TYPES_FILE_SYMLINK = set([TYPE_FILE_SYMLINK])
TYPES_DIR_REAL = set([TYPE_DIR_REAL])
TYPES_DIR_SYMLINK = set([TYPE_DIR_SYMLINK])
TYPES_FILE = TYPES_FILE_REAL | TYPES_FILE_SYMLINK
TYPES_DIR = TYPES_DIR_REAL | TYPES_DIR_SYMLINK
TYPES_REAL = TYPES_FILE_REAL | TYPES_DIR_REAL
TYPES_SYMLINK = TYPES_FILE_SYMLINK | TYPES_DIR_SYMLINK
TYPES_ALL = TYPES_FILE | TYPES_DIR
# Regexes ---------------------------------------------------------------------
SIZE_RE = re.compile(r'^(\d+(?:\.\d+)?)([bkmgtp])?[a-z]*$', re.IGNORECASE)
# Global Options --------------------------------------------------------------
# (it's a prototype, shut up)
options = None
# Output ----------------------------------------------------------------------
def out(s, line_ending='\n'):
sys.stdout.write(s + line_ending)
def err(s):
sys.stderr.write(s + '\n')
def die(s, exitcode=1):
err('error: ' + s)
sys.exit(exitcode)
# Searching! ------------------------------------------------------------------
def get_type(path):
link = os.path.islink(path)
dir = os.path.isdir(path)
if link and dir:
return TYPE_DIR_SYMLINK
elif link and not dir:
return TYPE_FILE_SYMLINK
elif not link and dir:
return TYPE_DIR_REAL
elif not link and not dir:
return TYPE_FILE_REAL
def should_ignore(basename, path):
if basename in VCS_DIRS:
return True
return False
def match(query, path, basename):
def _match():
if options.type != TYPES_ALL:
if get_type(path) not in options.type:
return False
if options.case == CASE_INSENSITIVE:
if query.lower() not in basename.lower():
return False
else:
if query not in basename:
return False
if options.larger_than:
stat = os.stat(path)
if stat.st_size < options.larger_than:
return False
if options.smaller_than:
stat = os.stat(path)
if stat.st_size > options.smaller_than:
return False
if not options.binary:
with open(path) as f:
if '\0' in f.read(1024):
return False
return True
result = _match()
return not result if options.invert else result
def search(query, dir='.', depth=0):
contents = os.listdir(dir)
next = []
for item in contents:
path = os.path.join(dir, item)
if not should_ignore(item, path):
if match(query, path, item):
out(path, '\0' if options.zero else '\n')
is_dir = os.path.isdir(path)
if is_dir:
if options.follow or not os.path.islink(path):
next.append(path)
if depth < options.depth:
for d in next:
search(query, d, depth + 1)
# Option Parsing and Main -----------------------------------------------------
def build_option_parser():
p = OptionParser("usage: %prog [options] PATTERN")
# Main options
p.add_option('-d', '--dir', default='.',
help='root the search in DIR (default .)',
metavar='DIR')
p.add_option('-D', '--depth', default='25',
help='search at most N directories deep (default 25)',
metavar='N')
p.add_option('-f', '--follow',
action='store_true', default=False,
help='follow symlinked directories and search their contents')
p.add_option('-F', '--no-follow',
dest='follow', action='store_false',
help="don't follow symlinked directories (default)")
p.add_option('-0', '--print0', dest='zero',
action='store_true', default=False,
help='separate matches with a null byte in output')
p.add_option('-l', '--literal',
action='store_true', default=False,
help='force literal search, even if it looks like a regex')
p.add_option('-v', '--invert',
action='store_true', default=False,
help='invert match')
# Case sensitivity
g = OptionGroup(p, "Configuring Case Sensitivity")
g.add_option('-s', '--case-sensitive',
dest='case', action='store_const', const=CASE_SENSITIVE,
default=CASE_SENSITIVE,
help='case sensitive matching (default)')
g.add_option('-i', '--case-insensitive',
dest='case', action='store_const', const=CASE_INSENSITIVE,
help='case insensitive matching')
g.add_option('-S', '--case-smart',
dest='case', action='store_const', const=CASE_SMART,
help='smart case matching (sensitive if any uppercase chars '
'are in the pattern, insensitive otherwise)')
p.add_option_group(g)
# Ignoring
g = OptionGroup(p, "Configuring Ignoring")
g.add_option('-b', '--binary',
dest='binary', action='store_true', default=True,
help="allow binary files (default)")
g.add_option('-B', '--no-binary',
dest='binary', action='store_false',
help='ignore binary files')
g.add_option('-r', '--restricted',
action='store_true', default=False,
help="restricted search (skip VCS directories, "
"parse all ignore files) (default)")
g.add_option('-q', '--semi-restricted',
action='store_true', default=False,
help="semi-restricted search (don't parse VCS ignore files, "
"but still skip VCS directories and parse .ffignore)")
g.add_option('-u', '--unrestricted',
action='store_true', default=False,
help="unrestricted search (don't parse ignore files, but "
"still skip VCS directories)")
g.add_option('-a', '--all',
action='store_true', default=False,
help="don't ignore anything (ALL files can match)")
g.add_option('-I', '--ignore', metavar='PATTERN',
action='append',
help="add a pattern to be ignored (can be given multiple times)")
p.add_option_group(g)
# Time filtering
g = OptionGroup(p, "Time Filtering")
g.add_option('--before',
help='match files modified < TIME',
metavar='TIME')
g.add_option('--after',
help='match files modified > TIME',
metavar='TIME')
g.add_option('--until',
help='match files modified <= TIME',
metavar='TIME')
g.add_option('--since',
help='match files modified >= TIME',
metavar='TIME')
g.add_option('--at',
help='match files modified at TIME',
metavar='TIME')
g.add_option('--created-before',
help='match files created < TIME',
metavar='TIME')
g.add_option('--created-after',
help='match files created > TIME',
metavar='TIME')
g.add_option('--created-until',
help='match files created <= TIME',
metavar='TIME')
g.add_option('--created-since',
help='match files created >= TIME',
metavar='TIME')
g.add_option('--created-at',
help='match files created at TIME',
metavar='TIME')
p.add_option_group(g)
# Size filtering
g = OptionGroup(p, "Size Filtering",
"Sizes can be given as a number followed by a prefix. Some examples: "
"1k, 5kb, 1.5gb, 2g, 1024b")
g.add_option('--larger-than',
help='match files larger than SIZE (inclusive)',
metavar='SIZE')
g.add_option('--bigger-than', dest='larger_than',
help=optparse.SUPPRESS_HELP)
g.add_option('--smaller-than',
help='match files smaller than SIZE (inclusive)',
metavar='SIZE')
p.add_option_group(g)
# Type filtering
g = OptionGroup(p, "Type Filtering",
"Possible types are "
"a (all), "
"f (files), "
"d (dirs), "
"r (real), "
"s (symlinked), "
"e (real files), "
"c (real dirs), "
"x (symlinked files), "
"y (symlinked dirs). "
"If multiple types are given they will be unioned together: "
"--type 'es' would match real files and all symlinks.")
g.add_option('-t', '--type',
action='store', default=False, metavar='TYPE(S)',
help='match only specific types of things (files, dirs, non-symlinks, symlinks)')
p.add_option_group(g)
return p
def build_type_set(types):
if not types:
return TYPES_ALL
result = set()
for c in types:
result = result | {
'a': TYPES_ALL,
'e': TYPES_FILE_REAL,
'x': TYPES_FILE_SYMLINK,
'c': TYPES_DIR_REAL,
'y': TYPES_DIR_SYMLINK,
'f': TYPES_FILE,
'd': TYPES_DIR,
'r': TYPES_REAL,
's': TYPES_SYMLINK,
}[c.lower()]
return result
def parse_size(size):
size = size.replace(' ', '') if size else size
if not size:
return None
m = SIZE_RE.match(size)
if not m:
die('invalid size "%s"' % size)
n, unit = m.groups()
try:
n = float(n)
except ValueError:
die('invalid size "%s"' % size)
unit = {
'b': BYTE,
'k': KILOBYTE,
'm': MEGABYTE,
'g': GIGABYTE,
't': TERABYTE,
'p': PETABYTE,
}[unit or 'b']
return int(n * unit)
def main():
global options
(options, args) = build_option_parser().parse_args()
# PATTERN
if len(args) > 1:
die("only one search pattern can be given")
sys.exit(1)
query = args[0] if args else ''
# --dir
if options.dir:
try:
os.chdir(options.dir)
except OSError:
die('could not change to directory "%s"' % options.dir)
# --depth
try:
options.depth = int(options.depth)
except ValueError:
die('depth must be a non-negative integer (got "%s")' % options.depth)
# --case-*
if options.case == CASE_SMART:
if any(c in string.uppercase for c in query):
options.case = CASE_SENSITIVE
else:
options.case = CASE_INSENSITIVE
# --type
options.type = build_type_set(options.type)
# --larger-than, --smaller-than
options.larger_than = parse_size(options.larger_than)
options.smaller_than = parse_size(options.smaller_than)
if options.larger_than or options.smaller_than:
# Directory sizes are not supported.
options.type = options.type - TYPES_DIR
# Go!
search(query)
if __name__ == '__main__':
main()