SDSL: Succinct Data Structure Library: sdsl/include/sdsl/bit_vector

Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2012 Simon Gog, Matthias Petri
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00022 #ifndef SDSL_BIT_VECTOR_INTERLEAVED
00023 #define SDSL_BIT_VECTOR_INTERLEAVED
00024 
00025 #include "int_vector.hpp"
00026 #include "bitmagic.hpp"
00027 #include "util.hpp"
00028 
00029 #include <queue>
00030 
00032 namespace sdsl
00033 {
00034 
00035 template<uint8_t b=1,uint32_t blockSize=512>// forward declaration needed for friend declaration
00036 class rank_support_interleaved;  // in bit_vector_interleaved
00037 
00038 template<uint8_t b=1,uint32_t blockSize=512>// forward declaration needed for friend declaration
00039 class select_support_interleaved;  // in bit_vector_interleaved
00040 
00042 
00049 template<uint32_t blockSize=512>
00050 class bit_vector_interleaved
00051 {
00052     public:
00053         typedef bit_vector::size_type   size_type;
00054         typedef size_type               value_type;
00055 
00056         friend class rank_support_interleaved<1,blockSize>;
00057         friend class rank_support_interleaved<0,blockSize>;
00058         friend class select_support_interleaved<1,blockSize>;
00059         friend class select_support_interleaved<0,blockSize>;
00060 
00061         typedef rank_support_interleaved<1,blockSize>     rank_1_type;
00062         typedef rank_support_interleaved<0,blockSize>     rank_0_type;
00063         typedef select_support_interleaved<1,blockSize> select_1_type;
00064         typedef select_support_interleaved<0,blockSize> select_0_type;
00065     private:
00066         size_type m_size;           /* size of the original bit vector */
00067         size_type m_totalBlocks;    /* total size of m_data in u64s */
00068         size_type m_superblocks;    /* number of superblocks */
00069         size_type m_blockShift;
00070         int_vector<64> m_data;           /* data */
00071         int_vector<64> m_rank_samples;   /* space for additional rank samples */
00072 
00073         // precondition: m_rank_samples.size() <= m_superblocks
00074         void init_rank_samples() {
00075             uint32_t blockSize_U64 = bit_magic::l1BP(blockSize>>6);
00076             size_type idx = 0;
00077             std::queue<size_type> lbs, rbs;
00078             lbs.push(0); rbs.push(m_superblocks);
00079             while (!lbs.empty()) {
00080                 size_type lb = lbs.front(); lbs.pop();
00081                 size_type rb = rbs.front(); rbs.pop();
00082                 if (/*lb < rb and*/ idx < m_rank_samples.size()) {
00083                     size_type mid = lb + (rb-lb)/2; // select mid \in [lb..rb)
00084                     size_type pos = (mid << blockSize_U64) + mid;
00085                     m_rank_samples[ idx++ ] = m_data[pos];
00086                     lbs.push(lb); rbs.push(mid);
00087                     lbs.push(mid+1); rbs.push(rb);
00088                 }
00089             }
00090         }
00091 
00092     public:
00093         bit_vector_interleaved():m_size(0), m_totalBlocks(0), m_superblocks(0), m_blockShift(0) {}
00094 
00095         bit_vector_interleaved(const bit_vector& bv):m_size(0), m_totalBlocks(0), m_superblocks(0), m_blockShift(0) {
00096             m_size = bv.size();
00097             /* calculate the number of superblocks */
00098 //          each block of size > 0 gets suberblock in which we store the cumulative sum up to this block
00099             m_superblocks = (m_size+blockSize) / blockSize;
00100             m_blockShift = bit_magic::l1BP(blockSize);
00101             /* allocate new data */
00102             size_type blocks = (m_size+64)/64;
00103             size_type mem =  blocks +         m_superblocks + 1;
00104 //                          ^^^^^^^^^^^^^^^   ^^^^^^^^^^^^^   ^
00105 //                          bit vector data | cum. sum data | sum after last block
00106             util::assign(m_data, int_vector<64>(mem));
00107             m_totalBlocks = mem;
00108 
00109             /* assign data and calculate super block values */
00110             const uint64_t* bvp = bv.data();
00111 
00112             size_type j = 0; // 64-bit word counter in the m_data
00113             size_type cum_sum = 0;
00114             size_type sample_rate = blockSize/64;
00115             for (size_type i=0, sample_cnt=sample_rate; i < blocks; ++i, ++sample_cnt) {
00116                 if (sample_cnt == sample_rate) {
00117                     m_data[j] = cum_sum;
00118                     sample_cnt = 0;
00119                     j++;
00120                 }
00121                 m_data[j] = bvp[i];
00122                 cum_sum += bit_magic::b1Cnt(m_data[j]);
00123                 j++;
00124             }
00125             m_data[j] = cum_sum; /* last superblock so we can always
00126                                     get num_ones fast */
00127             if (m_totalBlocks > 1024*64) {
00128                 // we store at most m_superblocks+1 rank_samples:
00129                 // we do a cache efficient binary search for the select on X=1024
00130                 // or X=the smallest power of two smaller than m_superblock
00131                 m_rank_samples.resize(std::min(1024ULL, 1ULL << bit_magic::l1BP(m_superblocks)));
00132             }
00133             init_rank_samples();
00134         }
00135 
00137 
00142         value_type operator[](size_type i)const {
00143             size_type bs = i >> m_blockShift;
00144             size_type block = bs + (i>>6) + 1;
00145             return ((m_data[block] >> (i&63)) & 1ULL);
00146         }
00147 
00149         size_type size()const {
00150             return m_size;
00151         }
00152 
00154         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const {
00155             structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
00156             size_type written_bytes = 0;
00157             written_bytes += util::write_member(m_size, out, child, "size");
00158             written_bytes += util::write_member(m_totalBlocks, out, child, "totalBlocks");
00159             written_bytes += util::write_member(m_superblocks, out, child, "superblocks");
00160             written_bytes += util::write_member(m_blockShift, out, child, "blockShift");
00161             written_bytes += m_data.serialize(out, child, "data");
00162             written_bytes += m_rank_samples.serialize(out, child, "rank_samples");
00163             structure_tree::add_size(child, written_bytes);
00164             return written_bytes;
00165         }
00166 
00168         void load(std::istream& in) {
00169             util::read_member(m_size, in);
00170             util::read_member(m_totalBlocks, in);
00171             util::read_member(m_superblocks, in);
00172             util::read_member(m_blockShift, in);
00173             m_data.load(in);
00174             m_rank_samples.load(in);
00175         }
00176 
00177         void swap(bit_vector_interleaved& bv) {
00178             if (this != &bv) {
00179                 std::swap(m_size, bv.m_size);
00180                 std::swap(m_totalBlocks, bv.m_totalBlocks);
00181                 std::swap(m_superblocks, bv.m_superblocks);
00182                 std::swap(m_blockShift, bv.m_blockShift);
00183                 m_data.swap(bv.m_data);
00184                 m_rank_samples.swap(bv.m_rank_samples);
00185             }
00186         }
00187 };
00188 
00189 template<uint8_t b,uint32_t blockSize>
00190 class rank_support_interleaved
00191 {
00192     public:
00193         typedef bit_vector::size_type                   size_type;
00194         typedef bit_vector_interleaved<blockSize>       bit_vector_type;
00195     private:
00196         const bit_vector_type* m_v;
00197         size_type m_blockShift;
00198         size_type m_blockMask;
00199         size_type m_blockSize_U64;  /* blocksize for superblocks in 64 bit words */
00200 
00201         inline size_type rank1(size_type i) const {
00202             size_type SBlockNum = i >> m_blockShift;
00203             size_type SBlockPos = (SBlockNum << m_blockSize_U64) + SBlockNum;
00204             uint64_t resp = m_v->m_data[SBlockPos];
00205             const uint64_t* B = (m_v->m_data.data() + (SBlockPos+1));
00206             uint64_t rem = i&63;
00207             uint64_t bits = (i&m_blockMask) - rem;
00208             while (bits) {
00209                 resp += bit_magic::b1Cnt(*B++);
00210                 bits -= 64;
00211             }
00212             resp += bit_magic::b1Cnt(*B & bit_magic::Li1Mask[rem]);
00213             return resp;
00214         }
00215 
00216 
00217         inline size_type rank0(size_type i) const {
00218             size_type SBlockNum = i >> m_blockShift;
00219             size_type SBlockPos = (SBlockNum << m_blockSize_U64) + SBlockNum;
00220             uint64_t resp = (SBlockNum << m_blockShift) - m_v->m_data[SBlockPos];
00221             const uint64_t* B = (m_v->m_data.data() + (SBlockPos+1));
00222             uint64_t rem = i&63;
00223             uint64_t bits = (i&m_blockMask) - rem;
00224             while (bits) {
00225                 resp += bit_magic::b1Cnt(~(*B)); B++;
00226                 bits -= 64;
00227             }
00228             resp += bit_magic::b1Cnt((~(*B)) & bit_magic::Li1Mask[rem]);
00229             return resp;
00230         }
00231 
00232     public:
00233 
00234         rank_support_interleaved(const bit_vector_type* v=NULL) {
00235             init(v);
00236             m_blockShift = bit_magic::l1BP(blockSize);
00237             m_blockMask = blockSize - 1;
00238             m_blockSize_U64 = bit_magic::l1BP(blockSize>>6);
00239         }
00240 
00241         void init(const bit_vector_type* v=NULL) {
00242             set_vector(v);
00243         }
00244 
00246         size_type rank(size_type i) const {
00247             if (b) return rank1(i);
00248             return rank0(i);
00249         }
00250 
00251         const size_type operator()(size_type i)const {
00252             return rank(i);
00253         }
00254 
00255         const size_type size()const {
00256             return m_v->size();
00257         }
00258 
00259         void set_vector(const bit_vector_type* v=NULL) {
00260             m_v = v;
00261         }
00262 
00263         rank_support_interleaved& operator=(const rank_support_interleaved& rs) {
00264             if (this != &rs) {
00265                 set_vector(rs.m_v);
00266             }
00267             return *this;
00268         }
00269 
00270         void swap(rank_support_interleaved& rs) { }
00271 
00272         bool operator==(const rank_support_interleaved& ss)const {
00273             if (this == &ss)
00274                 return true;
00275             return ss.m_v == m_v;
00276         }
00277 
00278         bool operator!=(const rank_support_interleaved& rs)const {
00279             return !(*this == rs);
00280         }
00281 
00282 
00283         void load(std::istream& in, const bit_vector_type* v=NULL) {
00284             set_vector(v);
00285         }
00286 
00287         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const {
00288             structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
00289             size_type written_bytes = 0;
00290             structure_tree::add_size(child, written_bytes);
00291             return written_bytes;
00292         }
00293 };
00294 
00295 
00296 template<uint8_t b,uint32_t blockSize>
00297 class select_support_interleaved
00298 {
00299     public:
00300         typedef bit_vector::size_type size_type;
00301         typedef bit_vector_interleaved<blockSize> bit_vector_type;
00302     private:
00303         const bit_vector_type* m_v;
00304         size_type m_superblocks;    /* number of superblocks */
00305         size_type m_blockShift;
00306         size_type m_blockSize_U64;
00307 
00308 
00310         size_type select1(size_type i) const {
00311             size_type lb = 0, rb = m_v->m_superblocks; // search interval [lb..rb)
00312             size_type res = 0;
00313             size_type idx = 0; // index in m_rank_samples
00314             /* binary search over super blocks */
00315             // invariant: lb==0 or m_data[ pos(lb-1) ] < i
00316             //            m_data[ pos(rb) ] >= i, initial since i < rank(size())
00317             while (lb < rb) {
00318                 size_type mid = (lb+rb)/2; // select mid \in [lb..rb)
00319                 if (idx < m_v->m_rank_samples.size()) {
00320                     if (m_v->m_rank_samples[idx] >= i) {
00321                         idx = (idx<<1) + 1;
00322                         rb = mid;
00323                     } else {
00324                         idx = (idx<<1) + 2;
00325                         lb = mid + 1;
00326                     }
00327                 } else {
00328                     size_type pos = (mid << m_blockSize_U64) + mid;
00329 //                                  ^^^^^^^^^^^^^^^^^^^^^^     ^^^^^^^^^^^^^^^^^^^
00330 //                                    data blocks to jump      superblock position
00331                     if (m_v->m_data[pos] >= i) {
00332                         rb = mid;
00333                     } else {
00334                         lb = mid + 1;
00335                     }
00336                 }
00337             }
00338             res = (rb-1) << m_blockShift;
00339             /* iterate in 64 bit steps */
00340             const uint64_t* w = m_v->m_data.data() + ((rb-1) << m_blockSize_U64) + (rb-1);
00341             i -= *w;  // subtract the cumulative sum before the superblock
00342             ++w; /* step into the data */
00343             size_type ones = bit_magic::b1Cnt(*w);
00344             while (ones < i) {
00345                 i -= ones; ++w;
00346                 ones = bit_magic::b1Cnt(*w);
00347                 res += 64;
00348             }
00349             /* handle last word */
00350             res += bit_magic::i1BP(*w, i);
00351             return res;
00352         }
00353 
00355         size_type select0(size_type i)const {
00356             size_type lb = 0, rb = m_v->m_superblocks; // search interval [lb..rb)
00357             size_type res = 0;
00358             size_type idx = 0; // index in m_rank_samples
00359             /* binary search over super blocks */
00360             // invariant: lb==0 or m_data[ pos(lb-1) ] < i
00361             //            m_data[ pos(rb) ] >= i, initial since i < rank(size())
00362             while (lb < rb) {
00363                 size_type mid = (lb+rb)/2; // select mid \in [lb..rb)
00364                 if (idx < m_v->m_rank_samples.size()) {
00365                     if (((mid << m_blockShift) - m_v->m_rank_samples[idx]) >= i) {
00366                         idx = (idx<<1) + 1;
00367                         rb = mid;
00368                     } else {
00369                         idx = (idx<<1) + 2;
00370                         lb = mid + 1;
00371                     }
00372                 } else {
00373                     size_type pos = (mid << m_blockSize_U64) + mid;
00374 //                                  ^^^^^^^^^^^^^^^^^^^^^^     ^^^^^^^^^^^^^^^^^^^
00375 //                                    data blocks to jump      superblock position
00376                     if (((mid << m_blockShift) - m_v->m_data[pos]) >= i) {
00377                         rb = mid;
00378                     } else {
00379                         lb = mid + 1;
00380                     }
00381                 }
00382             }
00383             res = (rb-1) << m_blockShift;
00384 
00385             /* iterate in 64 bit steps */
00386             const uint64_t* w = m_v->m_data.data() + ((rb-1) << m_blockSize_U64) + (rb-1);
00387             i = i - (res - *w);  // substract the cumulative sum before the superblock
00388             ++w; /* step into the data */
00389             size_type zeros = bit_magic::b1Cnt(~ *w);
00390             while (zeros < i) {
00391                 i -= zeros; ++w;
00392                 zeros = bit_magic::b1Cnt(~ *w);
00393                 res += 64;
00394             }
00395             /* handle last word */
00396             res += bit_magic::i1BP(~ *w, i);
00397             return res;
00398         }
00399 
00400     public:
00401 
00402         select_support_interleaved(const bit_vector_type* v=NULL) {
00403             init(v);
00404         }
00405 
00406 
00407         void init(const bit_vector_type* v=NULL) {
00408             set_vector(v);
00409             m_blockShift = bit_magic::l1BP(blockSize);
00410             m_blockSize_U64 = bit_magic::l1BP(blockSize>>6);
00411         }
00412 
00414         size_type select(size_type i) const {
00415             if (b) return select1(i);
00416             return select0(i);
00417         }
00418 
00419         const size_type operator()(size_type i)const {
00420             return select(i);
00421         }
00422 
00423         const size_type size()const {
00424             return m_v->size();
00425         }
00426 
00427         void set_vector(const bit_vector_type* v=NULL) {
00428             m_v = v;
00429         }
00430 
00431         select_support_interleaved& operator=(const select_support_interleaved& rs) {
00432             if (this != &rs) {
00433                 set_vector(rs.m_v);
00434             }
00435             return *this;
00436         }
00437 
00438         void swap(select_support_interleaved& rs) { }
00439 
00440         bool operator==(const select_support_interleaved& ss)const {
00441             if (this == &ss)
00442                 return true;
00443             return ss.m_v == m_v;
00444         }
00445 
00446         bool operator!=(const select_support_interleaved& rs)const {
00447             return !(*this == rs);
00448         }
00449 
00450 
00451         void load(std::istream& in, const bit_vector_type* v=NULL) {
00452             set_vector(v);
00453         }
00454 
00455         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const {
00456             structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
00457             size_type written_bytes = 0;
00458             structure_tree::add_size(child, written_bytes);
00459             return written_bytes;
00460         }
00461 };
00462 
00463 }
00464 
00465 #endif