
import sys
import time
MAXINT = (sys.maxint << 1) + 1
# Get reproducible results
import random
random.seed(1125604952.573868)

N = len(sys.argv) > 1 and int(sys.argv[1]) or 10000
base_ops = 1e6


def time_set_func(func, *sets):
    len_sum = sum(len(s) for s in sets)
    repeats = int(base_ops * 1.0 / len_sum)
    _t = time.time
    t1 = _t()
    for i in xrange(repeats):
        func(*sets)
    t2 = _t()
    return (t2 - t1) / repeats

def test_multiple_sets(func, l, *args):
    result_col = max(len(name) for (name, s) in l) + 2
    res = []
    for name, s in l:
        if not s:
            continue
        s = set(s)
        print ("%s:" % name).ljust(result_col),
        dt = time_set_func(func, *(s,) + args)
        print "%g s." % dt
        res.append(dt)
    print
    return res


no_dups = [random.randrange(0, sys.maxint) for i in xrange(N)]
many_dups = 20 * no_dups
many_collisions = [int((i & ~3) & sys.maxint) for i in no_dups]

try:
    f = file('/usr/share/dict/words')
except IOError:
    words = None
else:
    # Avoid the string-set special case in CPython
    words = random.sample([unicode(s) for s in f], N)
    f.close()

l = [
    ("no dups", no_dups),
    ("many dups", many_dups),
    ("many collisions", many_collisions),
#     ("words", words),
]

print "__init__"
test_multiple_sets(lambda a: set(a), l)
print "update with itself"
test_multiple_sets(lambda a: set(a).update(a), l)
print "difference with itself"
test_multiple_sets(lambda a: set(a).difference_update(a), l)

if words:
    print "update with words"
    test_multiple_sets(lambda a, b: set(a).update(b), l, set(words))
    print "difference with words"
    test_multiple_sets(lambda a, b: set(a).difference_update(b), l, set(words))
