Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemath
GitHub Repository: sagemath/sagelib
Path: blob/master/sage/misc/bitset.pxi
4054 views
"""
A fast bitset datatype in Cython.

Operations between bitsets are only guaranteed to work if the bitsets
have the same size, with the exception of ``bitset_realloc``.  Similarly, you
should not try to access elements of a bitset beyond the size.
"""

#*****************************************************************************
#     Copyright (C) 2008 Robert Bradshaw <[email protected]>
#
#  Distributed under the terms of the GNU General Public License (GPL)
#
#    This code is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#    General Public License for more details.
#
#  The full text of the GPL is available at:
#
#                  http://www.gnu.org/licenses/
#*****************************************************************************


# Doctests for the functions in this file are in misc_c.pyx


#############################################################################
# Bitset Initalization
#############################################################################

cdef inline bint bitset_init(bitset_t bits, unsigned long size) except -1:
    """
    Allocates a bitset of size size.  The set will probably be filled
    with random elements.

    size must be at least 1
    """
    bits.size = size
    bits.limbs = (size - 1)/(8*sizeof(unsigned long)) + 1
    bits.bits = <unsigned long*>sage_malloc(bits.limbs * sizeof(unsigned long))
    if bits.bits == NULL:
        raise MemoryError
    bits.bits[bits.limbs-1] = 0 #Set entries beyond the end of the set to zero.
    
cdef inline bint bitset_realloc(bitset_t bits, unsigned long size) except -1:
    """
    Reallocates a bitset to size size.  If reallocation is larger, new bitset
    does not contain any of the extra bits.
    """
    cdef unsigned long limbs_old = bits.limbs
    cdef unsigned long size_old = bits.size
    if size_old == size: return 0
    bits.limbs = (size - 1)/(8*sizeof(unsigned long)) + 1
    tmp = <unsigned long*>sage_realloc(bits.bits, bits.limbs * sizeof(unsigned long))
    if tmp != NULL:
        bits.bits = tmp
    else:
        bits.limbs = limbs_old
        raise MemoryError
    bits.size = size
    if size_old < size:
        bits.bits[size_old >> index_shift] &= ((<unsigned long>1 << (size_old & offset_mask)) - 1)
        memset(bits.bits + (size_old >> index_shift) + 1, 0, (bits.limbs - (size_old >> index_shift) - 1) * sizeof(unsigned long))

cdef inline void bitset_free(bitset_t bits):
    """
    Deallocates the memory in bits.
    """
    sage_free(bits.bits)
    
cdef inline void bitset_clear(bitset_t bits):
    """
    Removes all elements from the set
    """
    memset(bits.bits, 0, bits.limbs * sizeof(unsigned long))
      
cdef inline void bitset_zero(bitset_t bits):
    """
    Removes all elements from the set
    
    This function is the same as bitset_clear(bits).
    """
    bitset_clear(bits)

cdef inline void bitset_copy(bitset_t dst, bitset_t src):
    """
    Copy the bitset src over the bitset dst, overwriting dst.

    We assume the two sets have the same size.  Otherwise, the
    behavior of this function is undefined and the function may
    segfault.
    """
    memcpy(dst.bits, src.bits, dst.limbs * sizeof(unsigned long))
    
#############################################################################
# Bitset Comparison
#############################################################################

cdef inline bint bitset_isempty(bitset_t bits):
    """
    Tests whether bits is empty.  Returns True (i.e., 1) if the set is
    empty, False (i.e., 0) otherwise.
    """
    cdef long i
    for i from 0 <= i < bits.limbs:
        if bits.bits[i] != 0:
            return False
    return True
    
cdef inline bint bitset_is_zero(bitset_t bits):
    """
    Tests whether bits is empty (i.e., zero).  Returns True (1) if
    the set is empty, False (0) otherwise.
    
    This function is the same as bitset_is_empty(bits).
    """
    return bitset_isempty(bits)

cdef inline bint bitset_eq(bitset_t a, bitset_t b):
    """
    Compares bitset a and b.  Returns True (i.e., 1) if the sets are
    equal, and False (i.e., 0) otherwise.

    We assume the two sets have the same size.  Otherwise, the
    behavior of this function is undefined and the function may
    segfault.
    """
    return memcmp(a.bits, b.bits, a.limbs * sizeof(unsigned long)) == 0
    
cdef inline int bitset_cmp(bitset_t a, bitset_t b):
    """
    Compares bitsets a and b.  Returns 0 if the two sets are
    identical, and consistently returns -1 or 1 for two sets that are
    not equal.

    We assume the two sets have the same size.  Otherwise, the
    behavior of this function is undefined and the function may
    segfault.
    """
    cdef long i
    for i from a.limbs > i >= 0:
        if a.bits[i] != b.bits[i]:
            if a.bits[i] < b.bits[i]:
                return -1
            else:
                return 1
    return 0

cdef inline bint bitset_issubset(bitset_t a, bitset_t b):
    """
    Test whether a is a subset of b (i.e., every element in a is also
    in b).

    We assume the two sets have the same size.  Otherwise, the
    behavior of this function is undefined and the function may
    segfault.
    """
    cdef long i
    for i from 0 <= i < a.limbs:
        if (a.bits[i] & ~b.bits[i]) != 0:
            return False
    return True

cdef inline bint bitset_issuperset(bitset_t a, bitset_t b):
    """
    Test whether a is a superset of b (i.e., every element in b is also
    in a).

    We assume the two sets have the same size.  Otherwise, the
    behavior of this function is undefined and the function may
    segfault.
    """
    return bitset_issubset(b, a)



#############################################################################
# Bitset Bit Manipulation
#############################################################################

cdef inline bint bitset_in(bitset_t bits, unsigned long n):
    """
    Checks if n is in bits.  Returns True (i.e., 1) if n is in the
    set, False (i.e., 0) otherwise.
    """
    return (bits.bits[n >> index_shift] >> (n & offset_mask)) & 1

cdef inline bint bitset_check(bitset_t bits, unsigned long n):
    """
    Checks if n is in bits.  Returns True (i.e., 1) if n is in the
    set, False (i.e., 0) otherwise.

    This function is the same as bitset_in(bits, n).
    """
    return bitset_in(bits, n)

cdef inline bint bitset_not_in(bitset_t bits, unsigned long n):
    """
    Checks if n is not in bits.  Returns True (i.e., 1) if n is not in the
    set, False (i.e., 0) otherwise.
    """
    return not bitset_in(bits, n)


cdef inline bint bitset_remove(bitset_t bits, unsigned long n) except -1:
    """
    Removes n from bits.  Raises KeyError if n is not contained in bits.
    """
    if not bitset_in(bits, n):
        raise KeyError, n
    bitset_discard(bits, n)

cdef inline void bitset_discard(bitset_t bits, unsigned long n):
    """
    Removes n from bits.
    """
    bits.bits[n >> index_shift] &= ~((<unsigned long>1) << (n & offset_mask))

cdef inline void bitset_unset(bitset_t bits, unsigned long n):
    """
    Removes n from bits.

    This function is the same as bitset_discard(bits, n).
    """
    bitset_discard(bits, n)


cdef inline void bitset_add(bitset_t bits, unsigned long n):
    """
    Adds n to bits.
    """
    bits.bits[n >> index_shift] |= (<unsigned long>1) << (n & offset_mask)

cdef inline void bitset_set(bitset_t bits, unsigned long n):
    """
    Adds n to bits.

    This function is the same as bitset_add(bits, n).
    """
    bitset_add(bits, n)
    

cdef inline void bitset_set_to(bitset_t bits, unsigned long n, bint b):
    """
    If b is True, adds n to bits.  If b is False, removes n from bits.
    """
    bitset_unset(bits, n)
    bits.bits[n >> index_shift] |= (<unsigned long>b) << (n & offset_mask)
    
cdef inline void bitset_flip(bitset_t bits, unsigned long n):
    """
    If n is in bits, removes n from bits.  If n is not in bits, add n
    to bits.
    """
    bits.bits[n >> index_shift] ^= (<unsigned long>1) << (n & offset_mask)
    
cdef inline void bitset_set_first_n(bitset_t bits, unsigned long n):
    """
    Sets exactly the first n bits.
    """
    cdef unsigned long i
    cdef long index = n >> index_shift
    for i from 0 <= i < index:
        bits.bits[i] = ~0
    if index < bits.limbs:
        bits.bits[index] = ((<unsigned long>1) << (n & offset_mask)) - 1
    for i from index < i < bits.limbs:
        bits.bits[i] = 0

#############################################################################
# Bitset Searching
#############################################################################

cdef inline long _bitset_first_in_limb(unsigned long limb):
    """
    Given a limb of a bitset, return the index of the first nonzero
    bit.  If there are no bits set in the limb, return -1.
    """
    cdef long j
    # First, we see if the first half of the limb is zero or not
    if limb & (((<unsigned long>1) << 4*sizeof(unsigned long)) - 1):
        for j from 0 <= j < 4*sizeof(unsigned long):
            if limb & ((<unsigned long>1) << j):
                return j
    else:
        # First half of the limb is zero, so first nonzero bit is in
        # the second half.
        for j from 4*sizeof(unsigned long) <= j < 8*sizeof(unsigned long):
            if limb & ((<unsigned long>1) << j):
                return j
    return -1

#cdef inline long _bitset_first_in_limb_complement(unsigned long limb):
#    """
#    Given a limb of a bitset, return the index of the first zero
#    bit.  If there are no bits set in the limb, return -1.
#    """
#    cdef long j
#    # First, we see if the first half of the limb is all 1 or not
#    if ~limb & (((<unsigned long>1) << 4*sizeof(unsigned long)) - 1):
#        for j from 0 <= j < 4*sizeof(unsigned long):
#            if not limb & ((<unsigned long>1) << j):
#                return j
#    else:
#        # First half of the limb is zero, so first nonzero bit is in
#        # the second half.
#        for j from 4*sizeof(unsigned long) <= j < 8*sizeof(unsigned long):
#            if not limb & ((<unsigned long>1) << j):
#                return j
#    return -1

cdef inline long bitset_first(bitset_t a):
    """
    Calculate the index of the first element in the set.  If the set
    is empty, returns -1.
    """
    cdef long i
    for i from 0 <= i < a.limbs:
        if a.bits[i]:
            return (i << index_shift) | _bitset_first_in_limb(a.bits[i])
    return -1

cdef inline long bitset_first_in_complement(bitset_t a):
    """
    Calculate the index of the first element not in the set.  If the set
    is full, returns -1.
    """
    cdef long i, j = -1
    for i from 0 <= i < a.limbs:
        if ~a.bits[i]:
            j = (i << index_shift) | _bitset_first_in_limb(~a.bits[i])
            break
    if j >= a.size: j = -1
    return j

cdef inline long bitset_pop(bitset_t a) except -1:
    """
    Remove and return an arbitrary element from the set. Raises
    KeyError if the set is empty.
    """
    cdef long i = bitset_first(a)
    if i == -1:
        raise KeyError, 'pop from an empty set'
    bitset_discard(a, i)
    return i
    
cdef inline long bitset_first_diff(bitset_t a, bitset_t b):
    """
    Calculate the index of the first difference between a and b.  If a
    and b are equal, then return -1.

    We assume the two sets have the same size.  Otherwise, the
    behavior of this function is undefined and the function may
    segfault.
    """
    cdef long i
    for i from 0 <= i < a.limbs:
        if a.bits[i] != b.bits[i]:
            return (i << index_shift) | _bitset_first_in_limb(a.bits[i] ^ b.bits[i])
    return -1
    
cdef inline long bitset_next(bitset_t a, long n):
    """
    Calculate the index of the next element in the set, starting at
    (and including) n.  Return -1 if there are no elements from n
    onwards.
    """
    if n >= a.size:
        return -1
    cdef long i
    cdef unsigned long limb = a.bits[n >> index_shift] & ~(((<unsigned long>1) << (n & offset_mask)) - 1)
    cdef long ret = _bitset_first_in_limb(limb)
    if ret != -1:
        return (n & ~offset_mask) | ret
    for i from (n >> index_shift) < i < a.limbs:
        if a.bits[i]:
            return (i << index_shift) | _bitset_first_in_limb(a.bits[i])
    return -1
    
cdef inline long bitset_next_diff(bitset_t a, bitset_t b, long n):
    """
    Calculate the index of the next element that differs between a and
    b, starting at (and including) n.  Return -1 if there are no
    elements differing between a and b from n onwards.
    
    We assume the two sets have the same size.  Otherwise, the
    behavior of this function is undefined and the function may
    segfault.
    """
    if n >= a.size:
        return -1
    cdef long i
    cdef long limb = (a.bits[n >> index_shift] ^ b.bits[n >> index_shift]) \
        & ~(((<unsigned long>1) << (n & offset_mask)) - 1)
    cdef long ret = _bitset_first_in_limb(limb)
    if ret != -1:
        return (n & ~offset_mask) | ret
    for i from (n >> index_shift) < i < a.limbs:
        if a.bits[i] != b.bits[i]:
            return (i << index_shift) | _bitset_first_in_limb(a.bits[i] ^ b.bits[i])
    return -1



cdef inline long bitset_len(bitset_t bits):
    """
    Calculate the number of items in the set (i.e., the number of nonzero bits).
    """
    cdef long len = 0
    cdef long i = bitset_first(bits)
    while i>=0:
        len += 1
        i=bitset_next(bits, i+1)
    return len

cdef inline long bitset_hash(bitset_t bits):
    """
    Calculate a (very naive) hash function.

    This function should not depend on the size of the bitset, only on
    the items in the bitset.
    """
    cdef unsigned long hash = 0
    cdef long i
    for i from 0 <= i < bits.limbs:
        hash ^= bits.bits[i]
    return hash
    
#############################################################################
# Bitset Arithmetic
#############################################################################

cdef inline void bitset_complement(bitset_t r, bitset_t a):
    """
    Set r to be the complement of a, overwriting r.

    We assume the two sets have the same size.  Otherwise, the
    behavior of this function is undefined and the function may
    segfault.
    """
    cdef long i
    for i from 0 <= i < r.limbs:
        r.bits[i] = ~a.bits[i]
    if r.size & offset_mask != 0:
        # If the last limb has extra bits beyond the size of r,
        # then zero out those extra bits
        r.bits[r.limbs-1] &= ((<unsigned long>1) << ((r.size) & offset_mask)) - 1
    
cdef inline void bitset_not(bitset_t r, bitset_t a):
    """
    Set r to be the complement of a, overwriting r.

    We assume the two sets have the same size.  Otherwise, the
    behavior of this function is undefined and the function may
    segfault.

    This function is the same as bitset_complement(r, a).
    """
    bitset_complement(r, a)

cdef inline void bitset_intersection(bitset_t r, bitset_t a, bitset_t b):
    """
    Set r to the intersection of a and b, overwriting r.

    We assume the three sets have the same size.  Otherwise, the
    behavior of this function is undefined and the function may
    segfault.
    """
    cdef long i
    for i from 0 <= i < r.limbs:
        r.bits[i] = a.bits[i] & b.bits[i]
        

cdef inline void bitset_and(bitset_t r, bitset_t a, bitset_t b):
    """
    Set r to the intersection of a and b, overwriting r.

    We assume the three sets have the same size.  Otherwise, the
    behavior of this function is undefined and the function may
    segfault.

    This function is the same as bitset_intersection(r, a, b).
    """
    bitset_intersection(r, a, b)

cdef inline void bitset_union(bitset_t r, bitset_t a, bitset_t b):
    """
    Set r to the union of a and b, overwriting r.

    We assume the three sets have the same size.  Otherwise, the
    behavior of this function is undefined and the function may
    segfault.
    """
    cdef long i
    for i from 0 <= i < r.limbs:
        r.bits[i] = a.bits[i] | b.bits[i]

cdef inline void bitset_or(bitset_t r, bitset_t a, bitset_t b):
    """
    Set r to the union of a and b, overwriting r.

    We assume the three sets have the same size.  Otherwise, the
    behavior of this function is undefined and the function may
    segfault.
    
    This function is the same as bitset_union(r, a, b).
    """
    bitset_union(r, a, b)

cdef inline void bitset_difference(bitset_t r, bitset_t a, bitset_t b):
    """
    Set r to the difference of a and b (i.e., things in a that are not
    in b), overwriting r.

    We assume the three sets have the same size.  Otherwise, the
    behavior of this function is undefined and the function may
    segfault.
    """
    cdef long i
    for i from 0 <= i < r.limbs:
        r.bits[i] = a.bits[i] & ~b.bits[i]

cdef inline void bitset_symmetric_difference(bitset_t r, bitset_t a, bitset_t b):
    """
    Set r to the symmetric difference of a and b, overwriting r.

    We assume the three sets have the same size.  Otherwise, the
    behavior of this function is undefined and the function may
    segfault.
    """
    cdef long i
    for i from 0 <= i < r.limbs:
        r.bits[i] = a.bits[i] ^ b.bits[i]

cdef inline void bitset_xor(bitset_t r, bitset_t a, bitset_t b):
    """
    Set r to the symmetric difference of a and b, overwriting r.

    We assume the three sets have the same size.  Otherwise, the
    behavior of this function is undefined and the function may
    segfault.
    
    This function is the same as bitset_symmetric_difference(r, a, b).
    """
    bitset_symmetric_difference(r, a, b)

cdef void bitset_rshift(bitset_t r, bitset_t a, long n):
    if n <= 0:
        if n != 0:
            bitset_lshift(r, a, -n)
        else:
            bitset_copy(r, a)
        return
    elif n >= a.size:
        bitset_clear(r)
        return
    cdef long i
    cdef long off = n >> index_shift
    cdef int shift = offset_mask & n
    cdef int shift2 = 8*sizeof(unsigned long) - shift
    if shift == 0:
        for i from 0 <= i < r.limbs - off:
            r.bits[i] = a.bits[i+off]

    else:
        for i from 0 <= i < r.limbs - off - 1:
            r.bits[i] = (a.bits[i+off] >> shift) | (a.bits[i+off+1] << shift2)
        r.bits[r.limbs - off - 1] = a.bits[r.limbs - 1] >> shift
    
    if off > 0:
        memset(r.bits + r.limbs - off, 0, off * sizeof(unsigned long))
        
cdef void bitset_lshift(bitset_t r, bitset_t  a, long n):
    if n <= 0:
        if n != 0:
            bitset_rshift(r, a, -n)
        else:
            bitset_copy(r, a)
        return
    elif n >= a.size:
        bitset_clear(r)
        return
    cdef long i
    cdef long off = n >> index_shift
    cdef int shift = offset_mask & n
    cdef int shift2 = 8*sizeof(unsigned long) - shift
    if shift == 0:
        for i from r.limbs - off > i >= 0:
            r.bits[i+off] = a.bits[i]
            
    else:
        for i from r.limbs - off > i >= 1:
            r.bits[i+off] = (a.bits[i] << shift) | (a.bits[i-1] >> shift2)
        r.bits[off] = a.bits[0] << shift

    if off > 0:
        memset(r.bits, 0, off * sizeof(unsigned long))

#############################################################################
# Hamming Weights
#############################################################################

cdef enum:
    _bitset_hamming_table_bits = 8

cdef int _bitset_hamming_table[1 << _bitset_hamming_table_bits]

cdef void _bitset_fill_hamming_table():
    cdef int i, j
    for i from 0 <= i < (1 << _bitset_hamming_table_bits):
        _bitset_hamming_table[i] = 0
        for j from 0 <= j < _bitset_hamming_table_bits:
            _bitset_hamming_table[i] += (i >> j) & 1
    
_bitset_fill_hamming_table()

cdef inline int bitset_hamming_weight(bitset_t a):
    cdef long i, j
    cdef long w = 0
    cdef unsigned long limb
    for i from 0 <= i < a.limbs:
        limb = a.bits[i]
        for j from 0 <= j < (8*sizeof(unsigned long) + _bitset_hamming_table_bits - 1) / _bitset_hamming_table_bits:
            w += _bitset_hamming_table[(limb >> (j*_bitset_hamming_table_bits)) & ((1 << _bitset_hamming_table_bits) - 1)]
    return w

cdef inline long bitset_hamming_weight_sparse(bitset_t a):
    cdef long i
    cdef long w = 0
    cdef unsigned long limb
    for i from 0 <= i < a.limbs:
        limb = a.bits[i]
        while limb:
            w += _bitset_hamming_table[limb & ((1 << _bitset_hamming_table_bits) - 1)]
            limb = limb >> _bitset_hamming_table_bits
    return w
        
#############################################################################
# Bitset Conversion
#############################################################################

cdef char* bitset_chars(char* s, bitset_t bits, char zero=c'0', char one=c'1'):
    """
    Return a string representation of the bitset in s, using zero for
    the character representing the items not in the bitset and one for
    the character representing the items in the bitset.

    The string is both stored in s and returned.  If s is NULL, then a
    new string is allocated.
    """
    cdef long i
    if s == NULL:
        s = <char *>sage_malloc(bits.size+1)
    for i from 0 <= i < bits.size:
        s[i] = one if bitset_in(bits, i) else zero
    s[bits.size] = 0
    return s
    
cdef void bitset_from_str(bitset_t bits, char* s, char zero=c'0', char one=c'1'):
    """
    Initialize a bitset with a set derived from the character string
    s, where one represents the character indicating set membership.
    """
    bitset_init(bits, strlen(s))
    cdef long i
    for i from 0 <= i < bits.size:
        bitset_set_to(bits, i, s[i] == one)

cdef bitset_string(bitset_t bits):
    """
    Return a python string representing the bitset.
    """
    cdef char* s = bitset_chars(NULL, bits)
    cdef object py_s
    py_s = s
    sage_free(s)
    return py_s

cdef list bitset_list(bitset_t bits):
    """
    Return a list of elements in the bitset.
    """
    cdef list elts = []
    cdef long elt = bitset_first(bits)
    while elt >= 0:
        elts.append(elt)
        elt = bitset_next(bits, elt+1)
    return elts
    


#############################################################################
# Aliases for functions
#############################################################################