SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2012 Simon Gog, Matthias Petri 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00022 #ifndef SDSL_BIT_VECTOR_INTERLEAVED 00023 #define SDSL_BIT_VECTOR_INTERLEAVED 00024 00025 #include "int_vector.hpp" 00026 #include "bitmagic.hpp" 00027 #include "util.hpp" 00028 00029 #include <queue> 00030 00032 namespace sdsl 00033 { 00034 00035 template<uint8_t b=1,uint32_t blockSize=512>// forward declaration needed for friend declaration 00036 class rank_support_interleaved; // in bit_vector_interleaved 00037 00038 template<uint8_t b=1,uint32_t blockSize=512>// forward declaration needed for friend declaration 00039 class select_support_interleaved; // in bit_vector_interleaved 00040 00042 00049 template<uint32_t blockSize=512> 00050 class bit_vector_interleaved 00051 { 00052 public: 00053 typedef bit_vector::size_type size_type; 00054 typedef size_type value_type; 00055 00056 friend class rank_support_interleaved<1,blockSize>; 00057 friend class rank_support_interleaved<0,blockSize>; 00058 friend class select_support_interleaved<1,blockSize>; 00059 friend class select_support_interleaved<0,blockSize>; 00060 00061 typedef rank_support_interleaved<1,blockSize> rank_1_type; 00062 typedef rank_support_interleaved<0,blockSize> rank_0_type; 00063 typedef select_support_interleaved<1,blockSize> select_1_type; 00064 typedef select_support_interleaved<0,blockSize> select_0_type; 00065 private: 00066 size_type m_size; /* size of the original bit vector */ 00067 size_type m_totalBlocks; /* total size of m_data in u64s */ 00068 size_type m_superblocks; /* number of superblocks */ 00069 size_type m_blockShift; 00070 int_vector<64> m_data; /* data */ 00071 int_vector<64> m_rank_samples; /* space for additional rank samples */ 00072 00073 // precondition: m_rank_samples.size() <= m_superblocks 00074 void init_rank_samples() { 00075 uint32_t blockSize_U64 = bit_magic::l1BP(blockSize>>6); 00076 size_type idx = 0; 00077 std::queue<size_type> lbs, rbs; 00078 lbs.push(0); rbs.push(m_superblocks); 00079 while (!lbs.empty()) { 00080 size_type lb = lbs.front(); lbs.pop(); 00081 size_type rb = rbs.front(); rbs.pop(); 00082 if (/*lb < rb and*/ idx < m_rank_samples.size()) { 00083 size_type mid = lb + (rb-lb)/2; // select mid \in [lb..rb) 00084 size_type pos = (mid << blockSize_U64) + mid; 00085 m_rank_samples[ idx++ ] = m_data[pos]; 00086 lbs.push(lb); rbs.push(mid); 00087 lbs.push(mid+1); rbs.push(rb); 00088 } 00089 } 00090 } 00091 00092 public: 00093 bit_vector_interleaved():m_size(0), m_totalBlocks(0), m_superblocks(0), m_blockShift(0) {} 00094 00095 bit_vector_interleaved(const bit_vector& bv):m_size(0), m_totalBlocks(0), m_superblocks(0), m_blockShift(0) { 00096 m_size = bv.size(); 00097 /* calculate the number of superblocks */ 00098 // each block of size > 0 gets suberblock in which we store the cumulative sum up to this block 00099 m_superblocks = (m_size+blockSize) / blockSize; 00100 m_blockShift = bit_magic::l1BP(blockSize); 00101 /* allocate new data */ 00102 size_type blocks = (m_size+64)/64; 00103 size_type mem = blocks + m_superblocks + 1; 00104 // ^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^ ^ 00105 // bit vector data | cum. sum data | sum after last block 00106 util::assign(m_data, int_vector<64>(mem)); 00107 m_totalBlocks = mem; 00108 00109 /* assign data and calculate super block values */ 00110 const uint64_t* bvp = bv.data(); 00111 00112 size_type j = 0; // 64-bit word counter in the m_data 00113 size_type cum_sum = 0; 00114 size_type sample_rate = blockSize/64; 00115 for (size_type i=0, sample_cnt=sample_rate; i < blocks; ++i, ++sample_cnt) { 00116 if (sample_cnt == sample_rate) { 00117 m_data[j] = cum_sum; 00118 sample_cnt = 0; 00119 j++; 00120 } 00121 m_data[j] = bvp[i]; 00122 cum_sum += bit_magic::b1Cnt(m_data[j]); 00123 j++; 00124 } 00125 m_data[j] = cum_sum; /* last superblock so we can always 00126 get num_ones fast */ 00127 if (m_totalBlocks > 1024*64) { 00128 // we store at most m_superblocks+1 rank_samples: 00129 // we do a cache efficient binary search for the select on X=1024 00130 // or X=the smallest power of two smaller than m_superblock 00131 m_rank_samples.resize(std::min(1024ULL, 1ULL << bit_magic::l1BP(m_superblocks))); 00132 } 00133 init_rank_samples(); 00134 } 00135 00137 00142 value_type operator[](size_type i)const { 00143 size_type bs = i >> m_blockShift; 00144 size_type block = bs + (i>>6) + 1; 00145 return ((m_data[block] >> (i&63)) & 1ULL); 00146 } 00147 00149 size_type size()const { 00150 return m_size; 00151 } 00152 00154 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { 00155 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00156 size_type written_bytes = 0; 00157 written_bytes += util::write_member(m_size, out, child, "size"); 00158 written_bytes += util::write_member(m_totalBlocks, out, child, "totalBlocks"); 00159 written_bytes += util::write_member(m_superblocks, out, child, "superblocks"); 00160 written_bytes += util::write_member(m_blockShift, out, child, "blockShift"); 00161 written_bytes += m_data.serialize(out, child, "data"); 00162 written_bytes += m_rank_samples.serialize(out, child, "rank_samples"); 00163 structure_tree::add_size(child, written_bytes); 00164 return written_bytes; 00165 } 00166 00168 void load(std::istream& in) { 00169 util::read_member(m_size, in); 00170 util::read_member(m_totalBlocks, in); 00171 util::read_member(m_superblocks, in); 00172 util::read_member(m_blockShift, in); 00173 m_data.load(in); 00174 m_rank_samples.load(in); 00175 } 00176 00177 void swap(bit_vector_interleaved& bv) { 00178 if (this != &bv) { 00179 std::swap(m_size, bv.m_size); 00180 std::swap(m_totalBlocks, bv.m_totalBlocks); 00181 std::swap(m_superblocks, bv.m_superblocks); 00182 std::swap(m_blockShift, bv.m_blockShift); 00183 m_data.swap(bv.m_data); 00184 m_rank_samples.swap(bv.m_rank_samples); 00185 } 00186 } 00187 }; 00188 00189 template<uint8_t b,uint32_t blockSize> 00190 class rank_support_interleaved 00191 { 00192 public: 00193 typedef bit_vector::size_type size_type; 00194 typedef bit_vector_interleaved<blockSize> bit_vector_type; 00195 private: 00196 const bit_vector_type* m_v; 00197 size_type m_blockShift; 00198 size_type m_blockMask; 00199 size_type m_blockSize_U64; /* blocksize for superblocks in 64 bit words */ 00200 00201 inline size_type rank1(size_type i) const { 00202 size_type SBlockNum = i >> m_blockShift; 00203 size_type SBlockPos = (SBlockNum << m_blockSize_U64) + SBlockNum; 00204 uint64_t resp = m_v->m_data[SBlockPos]; 00205 const uint64_t* B = (m_v->m_data.data() + (SBlockPos+1)); 00206 uint64_t rem = i&63; 00207 uint64_t bits = (i&m_blockMask) - rem; 00208 while (bits) { 00209 resp += bit_magic::b1Cnt(*B++); 00210 bits -= 64; 00211 } 00212 resp += bit_magic::b1Cnt(*B & bit_magic::Li1Mask[rem]); 00213 return resp; 00214 } 00215 00216 00217 inline size_type rank0(size_type i) const { 00218 size_type SBlockNum = i >> m_blockShift; 00219 size_type SBlockPos = (SBlockNum << m_blockSize_U64) + SBlockNum; 00220 uint64_t resp = (SBlockNum << m_blockShift) - m_v->m_data[SBlockPos]; 00221 const uint64_t* B = (m_v->m_data.data() + (SBlockPos+1)); 00222 uint64_t rem = i&63; 00223 uint64_t bits = (i&m_blockMask) - rem; 00224 while (bits) { 00225 resp += bit_magic::b1Cnt(~(*B)); B++; 00226 bits -= 64; 00227 } 00228 resp += bit_magic::b1Cnt((~(*B)) & bit_magic::Li1Mask[rem]); 00229 return resp; 00230 } 00231 00232 public: 00233 00234 rank_support_interleaved(const bit_vector_type* v=NULL) { 00235 init(v); 00236 m_blockShift = bit_magic::l1BP(blockSize); 00237 m_blockMask = blockSize - 1; 00238 m_blockSize_U64 = bit_magic::l1BP(blockSize>>6); 00239 } 00240 00241 void init(const bit_vector_type* v=NULL) { 00242 set_vector(v); 00243 } 00244 00246 size_type rank(size_type i) const { 00247 if (b) return rank1(i); 00248 return rank0(i); 00249 } 00250 00251 const size_type operator()(size_type i)const { 00252 return rank(i); 00253 } 00254 00255 const size_type size()const { 00256 return m_v->size(); 00257 } 00258 00259 void set_vector(const bit_vector_type* v=NULL) { 00260 m_v = v; 00261 } 00262 00263 rank_support_interleaved& operator=(const rank_support_interleaved& rs) { 00264 if (this != &rs) { 00265 set_vector(rs.m_v); 00266 } 00267 return *this; 00268 } 00269 00270 void swap(rank_support_interleaved& rs) { } 00271 00272 bool operator==(const rank_support_interleaved& ss)const { 00273 if (this == &ss) 00274 return true; 00275 return ss.m_v == m_v; 00276 } 00277 00278 bool operator!=(const rank_support_interleaved& rs)const { 00279 return !(*this == rs); 00280 } 00281 00282 00283 void load(std::istream& in, const bit_vector_type* v=NULL) { 00284 set_vector(v); 00285 } 00286 00287 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { 00288 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00289 size_type written_bytes = 0; 00290 structure_tree::add_size(child, written_bytes); 00291 return written_bytes; 00292 } 00293 }; 00294 00295 00296 template<uint8_t b,uint32_t blockSize> 00297 class select_support_interleaved 00298 { 00299 public: 00300 typedef bit_vector::size_type size_type; 00301 typedef bit_vector_interleaved<blockSize> bit_vector_type; 00302 private: 00303 const bit_vector_type* m_v; 00304 size_type m_superblocks; /* number of superblocks */ 00305 size_type m_blockShift; 00306 size_type m_blockSize_U64; 00307 00308 00310 size_type select1(size_type i) const { 00311 size_type lb = 0, rb = m_v->m_superblocks; // search interval [lb..rb) 00312 size_type res = 0; 00313 size_type idx = 0; // index in m_rank_samples 00314 /* binary search over super blocks */ 00315 // invariant: lb==0 or m_data[ pos(lb-1) ] < i 00316 // m_data[ pos(rb) ] >= i, initial since i < rank(size()) 00317 while (lb < rb) { 00318 size_type mid = (lb+rb)/2; // select mid \in [lb..rb) 00319 if (idx < m_v->m_rank_samples.size()) { 00320 if (m_v->m_rank_samples[idx] >= i) { 00321 idx = (idx<<1) + 1; 00322 rb = mid; 00323 } else { 00324 idx = (idx<<1) + 2; 00325 lb = mid + 1; 00326 } 00327 } else { 00328 size_type pos = (mid << m_blockSize_U64) + mid; 00329 // ^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^ 00330 // data blocks to jump superblock position 00331 if (m_v->m_data[pos] >= i) { 00332 rb = mid; 00333 } else { 00334 lb = mid + 1; 00335 } 00336 } 00337 } 00338 res = (rb-1) << m_blockShift; 00339 /* iterate in 64 bit steps */ 00340 const uint64_t* w = m_v->m_data.data() + ((rb-1) << m_blockSize_U64) + (rb-1); 00341 i -= *w; // subtract the cumulative sum before the superblock 00342 ++w; /* step into the data */ 00343 size_type ones = bit_magic::b1Cnt(*w); 00344 while (ones < i) { 00345 i -= ones; ++w; 00346 ones = bit_magic::b1Cnt(*w); 00347 res += 64; 00348 } 00349 /* handle last word */ 00350 res += bit_magic::i1BP(*w, i); 00351 return res; 00352 } 00353 00355 size_type select0(size_type i)const { 00356 size_type lb = 0, rb = m_v->m_superblocks; // search interval [lb..rb) 00357 size_type res = 0; 00358 size_type idx = 0; // index in m_rank_samples 00359 /* binary search over super blocks */ 00360 // invariant: lb==0 or m_data[ pos(lb-1) ] < i 00361 // m_data[ pos(rb) ] >= i, initial since i < rank(size()) 00362 while (lb < rb) { 00363 size_type mid = (lb+rb)/2; // select mid \in [lb..rb) 00364 if (idx < m_v->m_rank_samples.size()) { 00365 if (((mid << m_blockShift) - m_v->m_rank_samples[idx]) >= i) { 00366 idx = (idx<<1) + 1; 00367 rb = mid; 00368 } else { 00369 idx = (idx<<1) + 2; 00370 lb = mid + 1; 00371 } 00372 } else { 00373 size_type pos = (mid << m_blockSize_U64) + mid; 00374 // ^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^ 00375 // data blocks to jump superblock position 00376 if (((mid << m_blockShift) - m_v->m_data[pos]) >= i) { 00377 rb = mid; 00378 } else { 00379 lb = mid + 1; 00380 } 00381 } 00382 } 00383 res = (rb-1) << m_blockShift; 00384 00385 /* iterate in 64 bit steps */ 00386 const uint64_t* w = m_v->m_data.data() + ((rb-1) << m_blockSize_U64) + (rb-1); 00387 i = i - (res - *w); // substract the cumulative sum before the superblock 00388 ++w; /* step into the data */ 00389 size_type zeros = bit_magic::b1Cnt(~ *w); 00390 while (zeros < i) { 00391 i -= zeros; ++w; 00392 zeros = bit_magic::b1Cnt(~ *w); 00393 res += 64; 00394 } 00395 /* handle last word */ 00396 res += bit_magic::i1BP(~ *w, i); 00397 return res; 00398 } 00399 00400 public: 00401 00402 select_support_interleaved(const bit_vector_type* v=NULL) { 00403 init(v); 00404 } 00405 00406 00407 void init(const bit_vector_type* v=NULL) { 00408 set_vector(v); 00409 m_blockShift = bit_magic::l1BP(blockSize); 00410 m_blockSize_U64 = bit_magic::l1BP(blockSize>>6); 00411 } 00412 00414 size_type select(size_type i) const { 00415 if (b) return select1(i); 00416 return select0(i); 00417 } 00418 00419 const size_type operator()(size_type i)const { 00420 return select(i); 00421 } 00422 00423 const size_type size()const { 00424 return m_v->size(); 00425 } 00426 00427 void set_vector(const bit_vector_type* v=NULL) { 00428 m_v = v; 00429 } 00430 00431 select_support_interleaved& operator=(const select_support_interleaved& rs) { 00432 if (this != &rs) { 00433 set_vector(rs.m_v); 00434 } 00435 return *this; 00436 } 00437 00438 void swap(select_support_interleaved& rs) { } 00439 00440 bool operator==(const select_support_interleaved& ss)const { 00441 if (this == &ss) 00442 return true; 00443 return ss.m_v == m_v; 00444 } 00445 00446 bool operator!=(const select_support_interleaved& rs)const { 00447 return !(*this == rs); 00448 } 00449 00450 00451 void load(std::istream& in, const bit_vector_type* v=NULL) { 00452 set_vector(v); 00453 } 00454 00455 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { 00456 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00457 size_type written_bytes = 0; 00458 structure_tree::add_size(child, written_bytes); 00459 return written_bytes; 00460 } 00461 }; 00462 00463 } 00464 00465 #endif