#!/usr/bin/python

import sys
MAXINT = (sys.maxint << 1) + 1
# Get reproducible results
import random
random.seed(1125604952.573868)

# import psyco
# psyco.full()


Dummy = object()
Empty = object()

class plain_set(object):
    # Knobs
    max_fill = 2.0 / 3
#     max_fill = 1.0 / 2
    grow_ratio = lambda self: self.used > 50000 and 2 or 4
    initial_size = 8
    perturb_shift = 5
    perturb_before = False
#     perturb_before = True

    def __init__(self):
        # Nb active slots
        self.used = 0
        # Nb active + dummy slots
        self.fill = 0
        # Current size - 1
        self.mask = self.initial_size - 1

        # Table of hash entries: we only store
        # hash values, making things simpler
        self.table = [Empty] * (self.mask + 1)
        # The real set ;)
        self._set = set()

        # ** Stats **
        # Number of inserts
        self.inserts = 0
        # Number of lookups
        self.lookups = 0
        self.resizing_lookups = 0
        # Number of collisions
        self.collisions = 0
        # Number of table walks
        self.walks = 0
        self.resizing_walks = 0
        # Number of dummies skipped
        self.skips = 0
        # Number of "hard" collisions
        # (i.e. using full unmasked hash values)
        self.hard_collisions = 0

    def _found_hard_collision(self, key, hash_):
        self.hard_collisions += 1
        print "hard collision with key '%s' hashed to '%d'" % (key, hash_)

    def _lookup(self, key, hash_, resizing=False):
        """
        Lookup key and return index of entry in table.
        """
        assert self.fill <= self.mask, "Table is full!"
        if not resizing:
            self.lookups += 1
        else:
            self.resizing_lookups += 1
        i = hash_ & self.mask
        v = self.table[i]
        if not resizing:
            self.walks += 1
        else:
            self.resizing_walks += 1
        if v is Empty:
            return i
        if v == hash_:
            return i
        if v is Dummy:
            freeslot = i
            assert not resizing
            self.skips += 1
        else:
            freeslot = None
        if not resizing:
            self.collisions += 1
        # Ensure unsigned calculation (logical rshift)
        perturb = hash_ & MAXINT
        if self.perturb_before:
            perturb >>= self.perturb_shift
        while 1:
            i = (i * 5 + perturb + 1) & MAXINT
            index = i & self.mask
            v = self.table[index]
            if not resizing:
                self.walks += 1
            else:
                self.resizing_walks += 1
            if v is Empty:
                if freeslot is not None:
                    return freeslot
                else:
                    return index
            if v == hash_:
                return index
            if v is Dummy:
                if freeslot is None:
                    freeslot = index
                assert not resizing
                self.skips += 1
            perturb >>= self.perturb_shift

    def _resize(self, minsize):
        size = self.initial_size
        while size <= minsize:
            size *= 2
        old_table = self.table
        self.table = [Empty] * size
        self.used = 0
        self.fill = 0
        self.mask = size - 1
        for v in old_table:
            if v is Empty or v is Dummy:
                continue
            i = self._lookup(None, v, resizing=True)
            self.table[i] = v
            self.used += 1
            self.fill += 1

    def _grow(self):
        """
        Grow table if necessary
        """
        if self.fill < self.max_fill * (self.mask + 1):
            return
        minsize = self.used * self.grow_ratio()
        self._resize(minsize)

    def _add_key(self, key, hash_, is_unique=False):
        """
        Add a key to the set
        """
        self.inserts += 1
        i = self._lookup(key, hash_)
        old = self.table[i]
        if old == hash_:
            if is_unique:
                self._found_hard_collision(key, hash_)
            return
        self.used += 1
        self.table[i] = hash_
        if old is not Empty:
            return
        self.fill += 1
        self._grow()

    def add(self, key, hash_=None):
        """
        Add key in table if not already present.
        """
        is_unique = key not in self._set
        self._set.add(key)
        if hash_ is None:
            hash_ = hash(key)
        self._add_key(key, hash_, is_unique)

    def summary(self):
        print "Inserts = %d" % self.inserts
        print "Entries = %d" % self.used
        print "Physical size = %d" % (self.mask + 1)
        print "Explicit lookups = %d" % self.lookups
        if self.lookups:
            mult = 100.0 / self.lookups
            print "- Table walks = %d (%g %%)" % (self.walks, mult * self.walks)
            print "- Collisions = %d (%g %%)" % (self.collisions, mult * self.collisions)
            print "- Average chain length = %g" % (self.walks * 1.0 / self.lookups - 1)
            print "- Dummy skips = %d (%g %%)" % (self.skips, mult * self.skips)
        print "Resizing lookups = %d" % self.resizing_lookups
        if self.resizing_lookups:
            mult = 100.0 / self.resizing_lookups
            print "- Table walks = %d (%g %%)" % (self.resizing_walks, mult * self.resizing_walks)
        print "Hard collisions = %d" % self.hard_collisions
        print "\n"


def test_populate(values):
    s = plain_set()
    for v in values:
        s.add(v)
    s.summary()

N = len(sys.argv) > 1 and int(sys.argv[1]) or 2000

nodups = [random.randrange(0, sys.maxint) for i in xrange(N)]

print "...no dups...\n"
test_populate(nodups)

# print "...some dups...\n"
# l = [random.randint(0, N/2) for i in xrange(N)]
# test_populate(l)
#
print "...many dups...\n"
# l = [random.randint(0, N/20) for i in xrange(N)]
l = 20 * nodups
test_populate(l)

print "...bad hash distribution...\n"
# l = [(random.randrange(0, sys.maxint) * 4) & sys.maxint for i in xrange(N)]
l = [(i * 4) & sys.maxint for i in nodups]
test_populate(l)

try:
    f = file('/usr/share/dict/words')
except IOError:
    pass
else:
    words = list(f)
    f.close()
    print "...system word list...\n"
    l = random.sample(words, N)
    test_populate(l)
