SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2012 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00022 #ifndef INCLUDED_SDSL_SD_VECTOR 00023 #define INCLUDED_SDSL_SD_VECTOR 00024 00025 #include "int_vector.hpp" 00026 #include "select_support_mcl.hpp" 00027 #include "util.hpp" 00028 #include "bitmagic.hpp" 00029 00031 namespace sdsl 00032 { 00033 00034 // forward declaration needed for friend declaration 00035 template<class hi_bit_vector_type = bit_vector, 00036 class Select1Support = typename hi_bit_vector_type::select_1_type, 00037 class Select0Support = typename hi_bit_vector_type::select_0_type> 00038 class sd_rank_support; // in sd_vector 00039 00040 // forward declaration needed for friend declaration 00041 template<class hi_bit_vector_type = bit_vector, 00042 class Select1Support = typename hi_bit_vector_type::select_1_type, 00043 class Select0Support = typename hi_bit_vector_type::select_0_type> 00044 class sd_select_support; // in sd_vector 00045 00047 // representing the positions of 1 by the Elias-Fano representation for non-decreasing sequences 00065 template<class hi_bit_vector_type = bit_vector, 00066 class hi_select_1 = typename hi_bit_vector_type::select_1_type, 00067 class hi_select_0 = typename hi_bit_vector_type::select_0_type> 00068 class sd_vector 00069 { 00070 public: 00071 typedef bit_vector::size_type size_type; 00072 typedef size_type value_type; 00073 typedef hi_select_0 select_0_support_type; 00074 typedef hi_select_1 select_1_support_type; 00075 00076 friend class sd_rank_support<hi_bit_vector_type, select_1_support_type, select_0_support_type>; 00077 friend class sd_select_support<hi_bit_vector_type, select_1_support_type, select_0_support_type>; 00078 00079 typedef sd_rank_support<hi_bit_vector_type, select_1_support_type, select_0_support_type> rank_1_type; 00080 typedef sd_select_support<hi_bit_vector_type, select_1_support_type, select_0_support_type> select_1_type; 00081 private: 00082 // we need this variables to represent the m ones of the original bit vector of size n 00083 size_type m_size; // length of the original bit vector 00084 uint8_t m_wl; // log n - log m, where n is the length of the original bit vector 00085 // and m is the number of ones in the bit vector, wl is the abbreviation 00086 // for ,,width (of) low (part)'' 00087 00088 int_vector<> m_low; // vector for the least significant bits of the positions of the m ones 00089 hi_bit_vector_type m_high; // bit vector that represents the most significant bit in permuted order 00090 select_1_support_type m_high_1_select; // select support for the ones in m_high 00091 select_0_support_type m_high_0_select; // select support for the zeros in m_high 00092 00093 void copy(const sd_vector& v) { 00094 m_size = v.m_size; 00095 m_wl = v.m_wl; 00096 m_low = v.m_low; 00097 m_high = v.m_high; 00098 m_high_1_select = v.m_high_1_select; 00099 m_high_1_select.set_vector(&m_high); 00100 m_high_0_select = v.m_high_0_select; 00101 m_high_0_select.set_vector(&m_high); 00102 } 00103 00104 public: 00105 const hi_bit_vector_type& high; 00106 const int_vector<>& low; 00107 const select_1_support_type& high_1_select; 00108 const select_0_support_type& high_0_select; 00109 00110 sd_vector():m_size(0), m_wl(0), 00111 high(m_high), low(m_low), 00112 high_1_select(m_high_1_select), high_0_select(m_high_0_select) { 00113 } 00114 00115 sd_vector(const bit_vector& bv):high(m_high),low(m_low), 00116 high_1_select(m_high_1_select), high_0_select(m_high_0_select) { 00117 m_size = bv.size(); 00118 size_type m = util::get_one_bits(bv); 00119 uint8_t logm = bit_magic::l1BP(m)+1; 00120 uint8_t logn = bit_magic::l1BP(m_size)+1; 00121 if (logm == logn) { 00122 --logm; // to ensure logn-logm > 0 00123 } 00124 m_wl = logn - logm; 00125 m_low = int_vector<>(m, 0, m_wl); 00126 bit_vector high = bit_vector(m + (1ULL<<logm), 0); // 00127 const uint64_t* bvp = bv.data(); 00128 for (size_type i=0, mm=0,last_high=0,highpos=0; i < (bv.size()+63)/64; ++i, ++bvp) { 00129 size_type position = 64*i; 00130 uint64_t w = *bvp; 00131 while (w) { // process bit_vector word by word 00132 uint8_t offset = bit_magic::r1BP(w); 00133 w >>= offset; // note: w >>= (offset+1) can not be applied for offset=63! 00134 position += offset; 00135 if (position >= bv.size()) // check that we have not reached the end of the bitvector 00136 break; 00137 // (1) handle high part 00138 size_type cur_high = position >> m_wl; 00139 highpos += (cur_high - last_high); // write cur_high-last_high 0s 00140 last_high = cur_high; 00141 // (2) handle low part 00142 m_low[mm++] = position; // int_vector truncates the most significant logm bits 00143 high[highpos++] = 1; // write 1 for the entry 00144 position += 1; 00145 w >>= 1; 00146 } 00147 } 00148 util::assign(m_high, high); 00149 util::init_support(m_high_1_select, &m_high); 00150 util::init_support(m_high_0_select, &m_high); 00151 } 00152 00154 00163 value_type operator[](size_type i)const { 00164 size_type high_val = (i >> (m_wl)); 00165 size_type sel_high = m_high_0_select.select(high_val + 1); 00166 size_type rank_low = sel_high - high_val; 00167 if (0 == rank_low) 00168 return 0; 00169 size_type val_low = i & bit_magic::Li1Mask[ m_wl ]; // extract the low m_wl = log n -log m bits 00170 --sel_high; --rank_low; 00171 while (m_high[sel_high] and m_low[rank_low] > val_low) { 00172 if (sel_high > 0) { 00173 --sel_high; --rank_low; 00174 } else 00175 return 0; 00176 } 00177 return m_high[sel_high] and m_low[rank_low] == val_low; 00178 } 00179 00181 void swap(sd_vector& v) { 00182 if (this != &v) { 00183 std::swap(m_size, v.m_size); 00184 std::swap(m_wl, v.m_wl); 00185 m_low.swap(v.m_low); 00186 m_high.swap(v.m_high); 00187 util::swap_support(m_high_1_select, v.m_high_1_select, &m_high, &v.m_high); 00188 util::swap_support(m_high_0_select, v.m_high_0_select, &m_high, &v.m_high); 00189 } 00190 } 00191 00193 size_type size()const { 00194 return m_size; 00195 } 00196 00197 sd_vector& operator=(const sd_vector& v) { 00198 if (this != &v) { 00199 copy(v); 00200 } 00201 return *this; 00202 } 00203 00205 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { 00206 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00207 size_type written_bytes = 0; 00208 written_bytes += util::write_member(m_size, out, child, "size"); 00209 written_bytes += util::write_member(m_wl, out, child, "wl"); 00210 written_bytes += m_low.serialize(out, child, "low"); 00211 written_bytes += m_high.serialize(out, child, "high"); 00212 written_bytes += m_high_1_select.serialize(out, child, "high_1_select"); 00213 written_bytes += m_high_0_select.serialize(out, child, "high_0_select"); 00214 structure_tree::add_size(child, written_bytes); 00215 return written_bytes; 00216 } 00217 00219 void load(std::istream& in) { 00220 util::read_member(m_size, in); 00221 util::read_member(m_wl, in); 00222 m_low.load(in); 00223 m_high.load(in); 00224 m_high_1_select.load(in, &m_high); 00225 m_high_0_select.load(in, &m_high); 00226 } 00227 }; 00228 00230 /* 00231 * \tparam hi_bit_vector_type Type of the bitvector HI used for representing the high part of the positions of the 1s in sd_vector. 00232 * \tparam hi_select_1 Type of the select support data structure which is used to select ones in HI in sd_vector. 00233 * \tparam hi_select_0 Type of the select support data structure which is used to select zeros in HI in sd_vector. 00234 */ 00235 template<class hi_bit_vector_type, class Select1Support, class Select0Support> 00236 class sd_rank_support 00237 { 00238 public: 00239 typedef bit_vector::size_type size_type; 00240 typedef sd_vector<hi_bit_vector_type, Select1Support, Select0Support> bit_vector_type; 00241 private: 00242 const bit_vector_type* m_v; 00243 00244 public: 00245 00246 explicit sd_rank_support(const bit_vector_type* v=NULL) { 00247 init(v); 00248 } 00249 00250 void init(const bit_vector_type* v=NULL) { 00251 set_vector(v); 00252 } 00253 00254 size_type rank(size_type i)const { 00255 // split problem in two parts: 00256 // (1) find >= 00257 size_type high_val = (i >> (m_v->m_wl)); 00258 size_type sel_high = m_v->m_high_0_select.select(high_val + 1); 00259 size_type rank_low = sel_high - high_val; // 00260 if (0 == rank_low) 00261 return 0; 00262 size_type val_low = i & bit_magic::Li1Mask[ m_v->m_wl ]; 00263 // now since rank_low > 0 => sel_high > 0 00264 do { 00265 if (!sel_high) 00266 return 0; 00267 --sel_high; --rank_low; 00268 } while (m_v->m_high[sel_high] and m_v->m_low[rank_low] >= val_low); 00269 return rank_low+1; 00270 } 00271 00272 const size_type operator()(size_type i)const { 00273 return rank(i); 00274 } 00275 00276 const size_type size()const { 00277 return m_v->size(); 00278 } 00279 00280 void set_vector(const bit_vector_type* v=NULL) { 00281 m_v = v; 00282 } 00283 00284 sd_rank_support& operator=(const sd_rank_support& rs) { 00285 if (this != &rs) { 00286 set_vector(rs.m_v); 00287 } 00288 return *this; 00289 } 00290 00291 void swap(sd_rank_support&) { } 00292 00293 bool operator==(const sd_rank_support& ss)const { 00294 if (this == &ss) 00295 return true; 00296 return ss.m_v == m_v; 00297 } 00298 00299 bool operator!=(const sd_rank_support& rs)const { 00300 return !(*this == rs); 00301 } 00302 00303 00304 void load(std::istream& in, const bit_vector_type* v=NULL) { 00305 set_vector(v); 00306 } 00307 00308 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { 00309 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00310 size_type written_bytes = 0; 00311 structure_tree::add_size(child, written_bytes); 00312 return written_bytes; 00313 } 00314 }; 00315 00317 /* 00318 * \tparam hi_bit_vector_type Type of the bitvector HI used for representing the high part of the positions of the 1s in sd_vector. 00319 * \tparam hi_select_1 Type of the select support data structure which is used to select ones in HI in sd_vector. 00320 * \tparam hi_select_0 Type of the select support data structure which is used to select zeros in HI in sd_vector. 00321 */ 00322 template<class hi_bit_vector_type, class Select1Support, class Select0Support> 00323 class sd_select_support 00324 { 00325 public: 00326 typedef bit_vector::size_type size_type; 00327 typedef sd_vector<hi_bit_vector_type, Select1Support, Select0Support> bit_vector_type; 00328 private: 00329 const bit_vector_type* m_v; 00330 00331 public: 00332 00333 explicit sd_select_support(const bit_vector_type* v=NULL) { 00334 init(v); 00335 } 00336 00337 void init(const bit_vector_type* v=NULL) { 00338 set_vector(v); 00339 } 00340 00342 size_type select(size_type i)const { 00343 return m_v->m_low[i-1] + // lower part of the number 00344 ((m_v->m_high_1_select(i) + 1 - i) << (m_v->m_wl)); // upper part 00345 //^-number of 0 before the i-th 1-^ ^-shift by m_wl 00346 } 00347 00348 const size_type operator()(size_type i)const { 00349 return select(i); 00350 } 00351 00352 const size_type size()const { 00353 return m_v->size(); 00354 } 00355 00356 void set_vector(const bit_vector_type* v=NULL) { 00357 m_v = v; 00358 } 00359 00360 sd_select_support& operator=(const sd_select_support& rs) { 00361 if (this != &rs) { 00362 set_vector(rs.m_v); 00363 } 00364 return *this; 00365 } 00366 00367 void swap(sd_select_support&) { } 00368 00369 bool operator==(const sd_select_support& ss)const { 00370 if (this == &ss) 00371 return true; 00372 return ss.m_v == m_v; 00373 } 00374 00375 bool operator!=(const sd_select_support& rs)const { 00376 return !(*this == rs); 00377 } 00378 00379 00380 void load(std::istream& in, const bit_vector_type* v=NULL) { 00381 set_vector(v); 00382 } 00383 00384 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { 00385 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00386 size_type written_bytes = 0; 00387 structure_tree::add_size(child, written_bytes); 00388 return written_bytes; 00389 } 00390 }; 00391 00392 } // end namespace 00393 00394 #endif