Three implementations of sets with bits.

From: Pascal Bourguignon (spam_at_thalassa.informatimago.com)
Date: 02/29/04


Date: 29 Feb 2004 06:08:25 +0100


I compared three ways of implementating sets with bits, using
integers, bit vectors, or arrays of small integers.

-----------------------------------------------------------
loops x #bits
                         CLISP CLISP SBCL SBCL programming
                         time(s) space(B) time(s) space(B)
100x65536
  integer(func) 1.79 8480k 0.30 1195k ultra simple
  bvector(func) 1.64 0k 1.36 45892k simple
  bvector(proc) 1.36 360k 1.31 13854k simple
  bset(proc) 0.25 440k 0.03 0k complex
1000x256
  integer(func) 1.24 6344k 0.31 7685k
  bvector(func) 1.27 0k 1.18 35096k
  bvector(proc) 1.02 120k 0.87 8617k
  bset(proc) 0.15 128k 0.21 7850k
10000x32
  integer(func) 30.52 154891k 7.62 194816k
  bvector(func) 32.51 0k 27.94 892479k
  bvector(proc) 26.31 2460k 21.38 212498k
  bset(proc) 3.70 2460k 5.26 198094k
-----------------------------------------------------------

Normalized (by run and by cl implementation):
-----------------------------------------------------------
loops x #bits
                         CLISP CLISP SBCL SBCL
                         time space time space
100x65536
  integer(func) 1.00 1.00 0.22 0.03
  bvector(func) 0.92 0.00 1.00 1.00
  bvector(proc) 0.76 0.04 0.96 0.30
  bset(proc) 0.14 0.05 0.02 0.00
1000x256
  integer(func) 0.98 1.00 0.26 0.22
  bvector(func) 1.00 0.00 1.00 1.00
  bvector(proc) 0.80 0.02 0.74 0.25
  bset(proc) 0.12 0.02 0.18 0.22
10000x32
  integer(func) 0.94 1.00 0.27 0.22
  bvector(func) 1.00 0.00 1.00 1.00
  bvector(proc) 0.81 0.02 0.77 0.24
  bset(proc) 0.11 0.02 0.19 0.22
-----------------------------------------------------------

I find it somewhat depressing that the ultra-simple and elegant
formulation be also the slowest and greediest (on clisp, but the
second nicest implementation is worse on sbcl).

That integer representation of sets could be much more efficient (both
in space and time) if it was possible to modify an integer value.
Must all integer values absolutely be immutable objects?

In the case of the bit vector representation, it could probably be as
efficient as the most efficient if we could manipulate several bits at
once. Is there a way to extract an integer from a bit vector? Or
"displace" bits to an integer?

In any case, I'd suggest the implementations to make a special case in
map and map-into when the function is identity, or one of the log*
family, and when the arguments are bit vectors, to process them words
by words. The speed gain on 32bit architecture would be good enough,
on 64bit it would be impressive!

integer(functional):
--------------------
(defun integer-intersection (p q) (logand p q))
(defun integer-union (p q) (logior p q))
(defun integer-difference (p q) (logandc2 p q))
(defun integer-contains (s e) (logbitp e s))
(defun integer-singleton (e) (dpb 1 (byte 1 e) 0))
(defun integer-include (s e) (dpb 1 (byte 1 e) s))
(defun integer-exclude (s e) (dpb 0 (byte 1 e) s))
(defun integer-cardinal (s) (logcount s))

bitvector(functional):
----------------------
(defun bit-vector-intersection (p q)
  (map '(array bit (*)) (function logand) p q))

(defun bit-vector-union (p q)
  (map '(array bit (*)) (function logior) p q))

(defun bit-vector-difference (p q)
  (map '(array bit (*)) (function logandc2) p q))

(defun bit-vector-contains (v e) (not (zerop (aref v e))))
(defun bit-vector-include (v e) (setf (aref v e) 1))
(defun bit-vector-exclude (v e) (setf (aref v e) 0))

bitvector(procedural):
----------------------
(defun bit-vector-assign-2 (p q) (map-into p (function identity) q))
(defun bit-vector-intersection-2 (p q) (map-into p (function logand) p q))
(defun bit-vector-union-2 (p q) (map-into p (function logior) p q))
(defun bit-vector-difference-2 (p q) (map-into p (function logandc2) p q))

bset(procedural):
-----------------

(defconstant +bit-per-bitset+ 32)
(deftype bitset () `(unsigned-byte ,+bit-per-bitset+))

(defstruct bset
  (bitsets (make-array :type (array bitset *))
  (cardinal nil :type (or null (integer 0)))
  (first-element 0 :type (integer 0)) ;; approximate
  (last-element 0 :type (integer 0)) ;; approximate
  ;; (for all i (==> (< i (bset-first-element bset)) (not (is-element i bset))))
  ;; (for all i (==> (> i (bset-last-element bset)) (not (is-element i bset))))
  );;bset

(proclaim '(inline elem-to-bit))
(defun elem-to-bit (element) (mod element +bit-per-bitset+))
(proclaim '(inline bitset-to-elem))
(defun bitset-to-elem (index) (* +bit-per-bitset+ (1+ index)))

(defun intersection (set1 set2)
  "
DO: set1 := set1 inter set2 inter
         Accumulate in set1 the intersection of set1 and set2
         (elements in set1 and in set2).
RETURN: SET1
"
  (let ((bits1 (bset-bitsets set1))
        (bits2 (bset-bitsets set2)))
    (for (i
         (elem-to-bitset (max (bset-first-element set1)
                              (bset-first-element set2)))
         (elem-to-bitset (min (bset-last-element set1)
                              (bset-last-element set2))))
         (setf (bsref bits1 i) (logand (bsref bits1 i) (bsref bits2 i)))))
  (setf (bset-cardinal set1) nil
        (bset-first-element set1) (max (bset-first-element set1)
                                       (bset-first-element set2))
        (bset-last-element set1) (min (bset-last-element set1)
                                       (bset-last-element set2)))
  set1)

(defun include (bset element)
  "
PRE: (<= 0 element (size bset))
POST: (is-element element bset)
RETURN: BSET
"
  (declare (type (integer 0) element))
  (let ((bits (bset-bitsets bset)))
    (setf (bsref bits (elem-to-bitset element))
          (dpb 1 (byte 1 (elem-to-bit element))
               (bsref bits (elem-to-bitset element)))))
  (setf (bset-cardinal bset) nil
        (bset-first-element bset) (cond
                                   ((is-element 0 bset) 0)
                                   ((zerop (bset-first-element bset)) element)
                                   (t (min element (bset-first-element bset))))
        (bset-last-element bset) (max element (bset-last-element bset)))
  bset)

    

-- 
__Pascal_Bourguignon__                     http://www.informatimago.com/
There is no worse tyranny than to force a man to pay for what he doesn't
want merely because you think it would be good for him.--Robert Heinlein
http://www.theadvocates.org/