Style changes.
author |
Steve Losh <steve@dwaiter.com> |
date |
Sun, 03 Oct 2010 17:45:09 -0400 |
parents |
9d004bdf5b1d |
children |
(none) |
#!/usr/bin/env python
'''
Test the prefix generating method of t vs my bug program
Created on Jun 11, 2010
@author: Michael Diamond
'''
import math,random,hashlib,cProfile
#
# Initial t Prefix
#
def t_prefixes(ids):
"""Return a mapping of ids to prefixes.
Each prefix will be the shortest possible substring of the ID that
can uniquely identify it among the given group of IDs.
If an ID of one task is entirely a substring of another task's ID, the
entire ID will be the prefix.
"""
prefixes = {}
for task_id in ids:
others = set(ids).difference([task_id])
for i in range(1, len(task_id)+1):
prefix = task_id[:i]
if not any(map(lambda o: o.startswith(prefix), others)):
prefixes[task_id] = prefix
break
prefixes[task_id] = task_id
return prefixes
#
# New t Prefix
#
from t import _prefixes as new_prefixes
#
# Test the prefix methods
#
def _hash(text):
""" Return a hash of the given text for use as a testing id """
return hashlib.sha1(text.encode('utf-8')).hexdigest()
def _gen_ids(num, substr=.4):
""" Generates a list of hashes of size num to be used as ids.
substr specifies the percentage of ids that should be substrings
of other ids """
out = []
alphabet = 'abcdefghijklmnopqrstuvwxyz'
min = 5
max = 15
sub_prefix = int(math.floor(num*substr))
num -= sub_prefix
for _ in range(num):
string = ''
for x in random.sample(alphabet,random.randint(min,max)):
string+=x
out.append(_hash(string))
for _ in range(sub_prefix):
str = random.choice(out)
if len(str) < 2:
sub_prefix += 1
continue
out.append(str[:random.randint(1,len(str)-1)])
random.shuffle(out)
return out
def _check_prefixes(t_pre,new_pre):
""" Tests two prefix maps to confirm they are the same """
for k in t_pre:
if not k in new_pre:
raise Exception(k+" not in new. Should be: "+new_pre[k])
if t_pre[k] != new_pre[k]:
raise Exception(k+" doesn't match. t: "+t_pre[k]+" - new: "+new_pre[k])
for k in new_pre:
if not k in t_pre:
raise Exception(k+" not in t. Expected: "+new_pre[k])
if new_pre[k] != t_pre[k]:
raise Exception(k+" doesn't match. new: "+new_pre[k]+" - t: "+t_pre[k])
print("No errors found. Both maps are identical.")
def check_prefixes(size=1000):
ids = _gen_ids(size)
_check_prefixes(t_prefixes(ids),new_prefixes(ids))
def speed_check(base=10,max_pow=4,size_limit=5000):
for i in range(max_pow):
size = int(math.pow(base,i))
print("-----\n\nSize: %s\n" % size)
globals()['ids'] = _gen_ids(size)
#print(ids)
if size <= size_limit:
print("t_prefixes()")
cProfile.run("t_prefixes(ids)")
else:
print("Too big to run t_prefixes() efficiently. Would take several minutes.\n")
print("new_prefixes()")
cProfile.run("new_prefixes(ids)")
check_prefixes()
speed_check()