SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/sd_vector.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2012 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00022 #ifndef INCLUDED_SDSL_SD_VECTOR
00023 #define INCLUDED_SDSL_SD_VECTOR
00024 
00025 #include "int_vector.hpp"
00026 #include "select_support_mcl.hpp"
00027 #include "util.hpp"
00028 #include "bitmagic.hpp"
00029 
00031 namespace sdsl
00032 {
00033 
00034 // forward declaration needed for friend declaration
00035 template<class hi_bit_vector_type = bit_vector,
00036          class Select1Support     = typename hi_bit_vector_type::select_1_type,
00037          class Select0Support     = typename hi_bit_vector_type::select_0_type>
00038 class sd_rank_support;  // in sd_vector
00039 
00040 // forward declaration needed for friend declaration
00041 template<class hi_bit_vector_type = bit_vector,
00042          class Select1Support     = typename hi_bit_vector_type::select_1_type,
00043          class Select0Support     = typename hi_bit_vector_type::select_0_type>
00044 class sd_select_support;  // in sd_vector
00045 
00047 // representing the positions of 1 by the Elias-Fano representation for non-decreasing sequences
00065 template<class hi_bit_vector_type       = bit_vector,
00066            class hi_select_1                    = typename hi_bit_vector_type::select_1_type,
00067            class hi_select_0                    = typename hi_bit_vector_type::select_0_type>
00068 class sd_vector
00069 {
00070     public:
00071         typedef bit_vector::size_type size_type;
00072         typedef size_type value_type;
00073         typedef hi_select_0 select_0_support_type;
00074         typedef hi_select_1 select_1_support_type;
00075 
00076         friend class sd_rank_support<hi_bit_vector_type, select_1_support_type, select_0_support_type>;
00077         friend class sd_select_support<hi_bit_vector_type, select_1_support_type, select_0_support_type>;
00078 
00079         typedef sd_rank_support<hi_bit_vector_type, select_1_support_type, select_0_support_type> rank_1_type;
00080         typedef sd_select_support<hi_bit_vector_type, select_1_support_type, select_0_support_type> select_1_type;
00081     private:
00082         // we need this variables to represent the m ones of the original bit vector of size n
00083         size_type m_size;                // length of the original bit vector
00084         uint8_t   m_wl;                  // log n - log m, where n is the length of the original bit vector
00085         // and m is the number of ones in the bit vector, wl is the abbreviation
00086         // for ,,width (of) low (part)''
00087 
00088         int_vector<>                    m_low;           // vector for the least significant bits of the positions of the m ones
00089         hi_bit_vector_type      m_high;          // bit vector that represents the most significant bit in permuted order
00090         select_1_support_type   m_high_1_select; // select support for the ones in m_high
00091         select_0_support_type   m_high_0_select; // select support for the zeros in m_high
00092 
00093         void copy(const sd_vector& v) {
00094             m_size = v.m_size;
00095             m_wl   = v.m_wl;
00096             m_low  = v.m_low;
00097             m_high = v.m_high;
00098             m_high_1_select = v.m_high_1_select;
00099             m_high_1_select.set_vector(&m_high);
00100             m_high_0_select = v.m_high_0_select;
00101             m_high_0_select.set_vector(&m_high);
00102         }
00103 
00104     public:
00105         const hi_bit_vector_type& high;
00106         const int_vector<>& low;
00107         const select_1_support_type&     high_1_select;
00108         const select_0_support_type&     high_0_select;
00109 
00110         sd_vector():m_size(0), m_wl(0),
00111             high(m_high), low(m_low),
00112             high_1_select(m_high_1_select), high_0_select(m_high_0_select) {
00113         }
00114 
00115         sd_vector(const bit_vector& bv):high(m_high),low(m_low),
00116             high_1_select(m_high_1_select), high_0_select(m_high_0_select) {
00117             m_size = bv.size();
00118             size_type m = util::get_one_bits(bv);
00119             uint8_t logm = bit_magic::l1BP(m)+1;
00120             uint8_t logn = bit_magic::l1BP(m_size)+1;
00121             if (logm == logn) {
00122                 --logm;    // to ensure logn-logm > 0
00123             }
00124             m_wl    = logn - logm;
00125             m_low = int_vector<>(m, 0, m_wl);
00126             bit_vector high = bit_vector(m + (1ULL<<logm), 0); //
00127             const uint64_t* bvp = bv.data();
00128             for (size_type i=0, mm=0,last_high=0,highpos=0; i < (bv.size()+63)/64; ++i, ++bvp) {
00129                 size_type position = 64*i;
00130                 uint64_t  w = *bvp;
00131                 while (w) {  // process bit_vector word by word
00132                     uint8_t offset = bit_magic::r1BP(w);
00133                     w >>= offset;   // note:  w >>= (offset+1) can not be applied for offset=63!
00134                     position += offset;
00135                     if (position >= bv.size()) // check that we have not reached the end of the bitvector
00136                         break;
00137                     // (1) handle high part
00138                     size_type cur_high = position >> m_wl;
00139                     highpos += (cur_high - last_high);   // write cur_high-last_high 0s
00140                     last_high = cur_high;
00141                     // (2) handle low part
00142                     m_low[mm++] = position; // int_vector truncates the most significant logm bits
00143                     high[highpos++] = 1;     // write 1 for the entry
00144                     position += 1;
00145                     w >>= 1;
00146                 }
00147             }
00148             util::assign(m_high, high);
00149             util::init_support(m_high_1_select, &m_high);
00150             util::init_support(m_high_0_select, &m_high);
00151         }
00152 
00154 
00163         value_type operator[](size_type i)const {
00164             size_type high_val = (i >> (m_wl));
00165             size_type sel_high = m_high_0_select.select(high_val + 1);
00166             size_type rank_low = sel_high - high_val;
00167             if (0 == rank_low)
00168                 return 0;
00169             size_type val_low = i & bit_magic::Li1Mask[ m_wl ]; // extract the low m_wl = log n -log m bits
00170             --sel_high; --rank_low;
00171             while (m_high[sel_high] and m_low[rank_low] > val_low) {
00172                 if (sel_high > 0) {
00173                     --sel_high; --rank_low;
00174                 } else
00175                     return 0;
00176             }
00177             return m_high[sel_high] and m_low[rank_low] == val_low;
00178         }
00179 
00181         void swap(sd_vector& v) {
00182             if (this != &v) {
00183                 std::swap(m_size, v.m_size);
00184                 std::swap(m_wl, v.m_wl);
00185                 m_low.swap(v.m_low);
00186                 m_high.swap(v.m_high);
00187                 util::swap_support(m_high_1_select, v.m_high_1_select, &m_high, &v.m_high);
00188                 util::swap_support(m_high_0_select, v.m_high_0_select, &m_high, &v.m_high);
00189             }
00190         }
00191 
00193         size_type size()const {
00194             return m_size;
00195         }
00196 
00197         sd_vector& operator=(const sd_vector& v) {
00198             if (this != &v) {
00199                 copy(v);
00200             }
00201             return *this;
00202         }
00203 
00205         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const {
00206             structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
00207             size_type written_bytes = 0;
00208             written_bytes += util::write_member(m_size, out, child, "size");
00209             written_bytes += util::write_member(m_wl, out, child, "wl");
00210             written_bytes += m_low.serialize(out, child, "low");
00211             written_bytes += m_high.serialize(out, child, "high");
00212             written_bytes += m_high_1_select.serialize(out, child, "high_1_select");
00213             written_bytes += m_high_0_select.serialize(out, child, "high_0_select");
00214             structure_tree::add_size(child, written_bytes);
00215             return written_bytes;
00216         }
00217 
00219         void load(std::istream& in) {
00220             util::read_member(m_size, in);
00221             util::read_member(m_wl, in);
00222             m_low.load(in);
00223             m_high.load(in);
00224             m_high_1_select.load(in, &m_high);
00225             m_high_0_select.load(in, &m_high);
00226         }
00227 };
00228 
00230 /*
00231  *      \tparam hi_bit_vector_type      Type of the bitvector HI used for representing the high part of the positions of the 1s in sd_vector.
00232  *  \tparam hi_select_1                 Type of the select support data structure which is used to select ones in HI in sd_vector.
00233  *  \tparam hi_select_0                 Type of the select support data structure which is used to select zeros in HI in sd_vector.
00234  */
00235 template<class hi_bit_vector_type, class Select1Support, class Select0Support>
00236 class sd_rank_support
00237 {
00238     public:
00239         typedef bit_vector::size_type size_type;
00240         typedef sd_vector<hi_bit_vector_type, Select1Support, Select0Support> bit_vector_type;
00241     private:
00242         const bit_vector_type* m_v;
00243 
00244     public:
00245 
00246         explicit sd_rank_support(const bit_vector_type* v=NULL) {
00247             init(v);
00248         }
00249 
00250         void init(const bit_vector_type* v=NULL) {
00251             set_vector(v);
00252         }
00253 
00254         size_type rank(size_type i)const {
00255             // split problem in two parts:
00256             // (1) find  >=
00257             size_type high_val = (i >> (m_v->m_wl));
00258             size_type sel_high = m_v->m_high_0_select.select(high_val + 1);
00259             size_type rank_low = sel_high - high_val; //
00260             if (0 == rank_low)
00261                 return 0;
00262             size_type val_low = i & bit_magic::Li1Mask[ m_v->m_wl ];
00263             // now since rank_low > 0 => sel_high > 0
00264             do {
00265                 if (!sel_high)
00266                     return 0;
00267                 --sel_high; --rank_low;
00268             } while (m_v->m_high[sel_high] and m_v->m_low[rank_low] >= val_low);
00269             return rank_low+1;
00270         }
00271 
00272         const size_type operator()(size_type i)const {
00273             return rank(i);
00274         }
00275 
00276         const size_type size()const {
00277             return m_v->size();
00278         }
00279 
00280         void set_vector(const bit_vector_type* v=NULL) {
00281             m_v = v;
00282         }
00283 
00284         sd_rank_support& operator=(const sd_rank_support& rs) {
00285             if (this != &rs) {
00286                 set_vector(rs.m_v);
00287             }
00288             return *this;
00289         }
00290 
00291         void swap(sd_rank_support&) { }
00292 
00293         bool operator==(const sd_rank_support& ss)const {
00294             if (this == &ss)
00295                 return true;
00296             return ss.m_v == m_v;
00297         }
00298 
00299         bool operator!=(const sd_rank_support& rs)const {
00300             return !(*this == rs);
00301         }
00302 
00303 
00304         void load(std::istream& in, const bit_vector_type* v=NULL) {
00305             set_vector(v);
00306         }
00307 
00308         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const {
00309             structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
00310             size_type written_bytes = 0;
00311             structure_tree::add_size(child, written_bytes);
00312             return written_bytes;
00313         }
00314 };
00315 
00317 /*
00318  *      \tparam hi_bit_vector_type      Type of the bitvector HI used for representing the high part of the positions of the 1s in sd_vector.
00319  *  \tparam hi_select_1                 Type of the select support data structure which is used to select ones in HI in sd_vector.
00320  *  \tparam hi_select_0                 Type of the select support data structure which is used to select zeros in HI in sd_vector.
00321  */
00322 template<class hi_bit_vector_type, class Select1Support, class Select0Support>
00323 class sd_select_support
00324 {
00325     public:
00326         typedef bit_vector::size_type size_type;
00327         typedef sd_vector<hi_bit_vector_type, Select1Support, Select0Support> bit_vector_type;
00328     private:
00329         const bit_vector_type* m_v;
00330 
00331     public:
00332 
00333         explicit sd_select_support(const bit_vector_type* v=NULL) {
00334             init(v);
00335         }
00336 
00337         void init(const bit_vector_type* v=NULL) {
00338             set_vector(v);
00339         }
00340 
00342         size_type select(size_type i)const {
00343             return m_v->m_low[i-1] +  // lower part of the number
00344                    ((m_v->m_high_1_select(i) + 1 - i)  << (m_v->m_wl));  // upper part
00345             //^-number of 0 before the i-th 1-^      ^-shift by m_wl
00346         }
00347 
00348         const size_type operator()(size_type i)const {
00349             return select(i);
00350         }
00351 
00352         const size_type size()const {
00353             return m_v->size();
00354         }
00355 
00356         void set_vector(const bit_vector_type* v=NULL) {
00357             m_v = v;
00358         }
00359 
00360         sd_select_support& operator=(const sd_select_support& rs) {
00361             if (this != &rs) {
00362                 set_vector(rs.m_v);
00363             }
00364             return *this;
00365         }
00366 
00367         void swap(sd_select_support&) { }
00368 
00369         bool operator==(const sd_select_support& ss)const {
00370             if (this == &ss)
00371                 return true;
00372             return ss.m_v == m_v;
00373         }
00374 
00375         bool operator!=(const sd_select_support& rs)const {
00376             return !(*this == rs);
00377         }
00378 
00379 
00380         void load(std::istream& in, const bit_vector_type* v=NULL) {
00381             set_vector(v);
00382         }
00383 
00384         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const {
00385             structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
00386             size_type written_bytes = 0;
00387             structure_tree::add_size(child, written_bytes);
00388             return written_bytes;
00389         }
00390 };
00391 
00392 } // end namespace
00393 
00394 #endif