Shared2018-02-21-174103.ipynbOpen in CoCalc
from bitarray import bitarray
import mmh3

class BloomFilter:

    def __init__(self, size, hash_count): #num_hash
        self.size = size
        self.hash_count = hash_count
        self.bit_array = bitarray(size)
        self.bit_array.setall(0)

    def add(self, string):
        for seed in xrange(self.hash_count):
            result = mmh3.hash(string, seed) % self.size
            self.bit_array[result] = 1

    def lookup(self, string):
        for seed in xrange(self.hash_count):
            result = mmh3.hash(string, seed) % self.size
            if self.bit_array[result] == 0:
                return "Not here"
        return "Very likely"

bf = BloomFilter(500000, 7)

lines = open("/usr/share/dict/american-english").read().splitlines()
for line in lines:
    bf.add(line)

print (bf.lookup("google"))
print (bf.lookup("Max"))
print (bf.lookup("mice"))
print (bf.lookup("3"))
--------------------------------------------------------------------------- ImportError Traceback (most recent call last) <ipython-input-3-35487c9a841b> in <module>() 1 from bitarray import bitarray ----> 2 import mmh3 3 4 class BloomFilter: 5 ImportError: No module named 'mmh3'
pip install murmurhash3
The following command must be run outside of the IPython shell: $ pip install murmurhash3 The Python package manager (pip) can only be used from outside of IPython. Please reissue the `pip` command in a separate terminal or command prompt. See the Python documentation for more informations on how to install packages: https://docs.python.org/3/installing/