hg.stevelosh.com > t

--- a/prefixcheck.py	Sun Oct 03 17:45:09 2010 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-#!/usr/bin/env python
-'''
-Test the prefix generating method of t vs my bug program
-
-Created on Jun 11, 2010
-
-@author: Michael Diamond
-'''
-
-import math,random,hashlib,cProfile
-
-#
-# Initial t Prefix
-#
-def t_prefixes(ids):
-    """Return a mapping of ids to prefixes.
-
-    Each prefix will be the shortest possible substring of the ID that
-    can uniquely identify it among the given group of IDs.
-
-    If an ID of one task is entirely a substring of another task's ID, the
-    entire ID will be the prefix.
-
-    """
-    prefixes = {}
-    for task_id in ids:
-        others = set(ids).difference([task_id])
-        for i in range(1, len(task_id)+1):
-            prefix = task_id[:i]
-            if not any(map(lambda o: o.startswith(prefix), others)):
-                prefixes[task_id] = prefix
-                break
-            prefixes[task_id] = task_id
-    return prefixes
-
-#
-# New t Prefix
-#
-from t import _prefixes as new_prefixes
-#
-# Test the prefix methods
-#
-def _hash(text):
-    """ Return a hash of the given text for use as a testing id """
-    return hashlib.sha1(text.encode('utf-8')).hexdigest()
-
-def _gen_ids(num, substr=.4):
-    """ Generates a list of hashes of size num to be used as ids.
-    substr specifies the percentage of ids that should be substrings
-    of other ids """
-    out = []
-    alphabet = 'abcdefghijklmnopqrstuvwxyz'
-    min = 5
-    max = 15
-    sub_prefix = int(math.floor(num*substr))
-    num -= sub_prefix
-    for _ in range(num):
-        string = ''
-        for x in random.sample(alphabet,random.randint(min,max)):
-            string+=x
-        out.append(_hash(string))
-    for _ in range(sub_prefix):
-        str = random.choice(out)
-        if len(str) < 2:
-            sub_prefix += 1
-            continue
-        out.append(str[:random.randint(1,len(str)-1)])
-    random.shuffle(out)
-    return out
-
-def _check_prefixes(t_pre,new_pre):
-    """ Tests two prefix maps to confirm they are the same """
-    for k in t_pre:
-        if not k in new_pre:
-            raise Exception(k+" not in new.  Should be: "+new_pre[k])
-        if t_pre[k] != new_pre[k]:
-            raise Exception(k+" doesn't match.  t: "+t_pre[k]+" - new: "+new_pre[k])
-    for k in new_pre:
-        if not k in t_pre:
-            raise Exception(k+" not in t.  Expected: "+new_pre[k])
-        if new_pre[k] != t_pre[k]:
-            raise Exception(k+" doesn't match.  new: "+new_pre[k]+" - t: "+t_pre[k])
-    print("No errors found.  Both maps are identical.")
-
-def check_prefixes(size=1000):
-    ids = _gen_ids(size)
-    _check_prefixes(t_prefixes(ids),new_prefixes(ids))
-
-def speed_check(base=10,max_pow=4,size_limit=5000):
-    for i in range(max_pow):
-        size = int(math.pow(base,i))
-        print("-----\n\nSize: %s\n" % size)
-        globals()['ids'] = _gen_ids(size)
-        #print(ids)
-
-        if size <= size_limit:
-            print("t_prefixes()")
-            cProfile.run("t_prefixes(ids)")
-        else:
-            print("Too big to run t_prefixes() efficiently.  Would take several minutes.\n")
-
-        print("new_prefixes()")
-        cProfile.run("new_prefixes(ids)")
-
-check_prefixes()
-speed_check()
author	Steve Losh <steve@dwaiter.com>
date	Sun, 03 Oct 2010 17:45:26 -0400
parents	21a4c758ca73
children	52540acf4f80
branches/tags	(none)
files	prefixcheck.py