# HG changeset patch # User Steve Losh # Date 1286142326 14400 # Node ID 887b6b2b75647c15cc3783f3b9b721179002c249 # Parent 21a4c758ca737df4be61a874ff7aebe194a7a1b4 Remove the prefixcheck file. diff -r 21a4c758ca73 -r 887b6b2b7564 prefixcheck.py --- a/prefixcheck.py Sun Oct 03 17:45:09 2010 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,106 +0,0 @@ -#!/usr/bin/env python -''' -Test the prefix generating method of t vs my bug program - -Created on Jun 11, 2010 - -@author: Michael Diamond -''' - -import math,random,hashlib,cProfile - -# -# Initial t Prefix -# -def t_prefixes(ids): - """Return a mapping of ids to prefixes. - - Each prefix will be the shortest possible substring of the ID that - can uniquely identify it among the given group of IDs. - - If an ID of one task is entirely a substring of another task's ID, the - entire ID will be the prefix. - - """ - prefixes = {} - for task_id in ids: - others = set(ids).difference([task_id]) - for i in range(1, len(task_id)+1): - prefix = task_id[:i] - if not any(map(lambda o: o.startswith(prefix), others)): - prefixes[task_id] = prefix - break - prefixes[task_id] = task_id - return prefixes - -# -# New t Prefix -# -from t import _prefixes as new_prefixes -# -# Test the prefix methods -# -def _hash(text): - """ Return a hash of the given text for use as a testing id """ - return hashlib.sha1(text.encode('utf-8')).hexdigest() - -def _gen_ids(num, substr=.4): - """ Generates a list of hashes of size num to be used as ids. - substr specifies the percentage of ids that should be substrings - of other ids """ - out = [] - alphabet = 'abcdefghijklmnopqrstuvwxyz' - min = 5 - max = 15 - sub_prefix = int(math.floor(num*substr)) - num -= sub_prefix - for _ in range(num): - string = '' - for x in random.sample(alphabet,random.randint(min,max)): - string+=x - out.append(_hash(string)) - for _ in range(sub_prefix): - str = random.choice(out) - if len(str) < 2: - sub_prefix += 1 - continue - out.append(str[:random.randint(1,len(str)-1)]) - random.shuffle(out) - return out - -def _check_prefixes(t_pre,new_pre): - """ Tests two prefix maps to confirm they are the same """ - for k in t_pre: - if not k in new_pre: - raise Exception(k+" not in new. Should be: "+new_pre[k]) - if t_pre[k] != new_pre[k]: - raise Exception(k+" doesn't match. t: "+t_pre[k]+" - new: "+new_pre[k]) - for k in new_pre: - if not k in t_pre: - raise Exception(k+" not in t. Expected: "+new_pre[k]) - if new_pre[k] != t_pre[k]: - raise Exception(k+" doesn't match. new: "+new_pre[k]+" - t: "+t_pre[k]) - print("No errors found. Both maps are identical.") - -def check_prefixes(size=1000): - ids = _gen_ids(size) - _check_prefixes(t_prefixes(ids),new_prefixes(ids)) - -def speed_check(base=10,max_pow=4,size_limit=5000): - for i in range(max_pow): - size = int(math.pow(base,i)) - print("-----\n\nSize: %s\n" % size) - globals()['ids'] = _gen_ids(size) - #print(ids) - - if size <= size_limit: - print("t_prefixes()") - cProfile.run("t_prefixes(ids)") - else: - print("Too big to run t_prefixes() efficiently. Would take several minutes.\n") - - print("new_prefixes()") - cProfile.run("new_prefixes(ids)") - -check_prefixes() -speed_check()