SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2008 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #include "int_vector.hpp" 00022 #include "elias_delta_coder.hpp" 00023 #include "rank_support.hpp" 00024 #include "select_support.hpp" 00025 00026 #ifndef SDSL_ENC_VECTOR_THEO 00027 #define SDSL_ENC_VECTOR_THEO 00028 00030 namespace sdsl 00031 { 00032 00033 template<class EncVector> 00034 class enc_vector_theo_const_iterator; // forward declaration 00035 00036 template<uint8_t> 00037 struct enc_vector_theo_trait { 00038 typedef int_vector<0> int_vector_type; 00039 }; 00040 00041 template<> 00042 struct enc_vector_theo_trait<32> { 00043 typedef int_vector<32> int_vector_type; 00044 }; 00045 00047 00051 template<class Coder=coder::elias_delta, 00052 uint32_t SampleDens = 1, 00053 class RankSupport=rank_support_v<>, 00054 class SelectSupport = select_support_mcl<1>, 00055 uint8_t fixedIntWidth = 0 > 00056 class enc_vector_theo 00057 { 00058 public: 00059 typedef uint64_t value_type; // STL Container requirement 00060 typedef enc_vector_theo_const_iterator<enc_vector_theo<Coder, SampleDens, RankSupport, SelectSupport, fixedIntWidth> > iterator; // STL Container requirement 00061 typedef iterator const_iterator; // STL Container requirement 00062 typedef const value_type reference; 00063 typedef const value_type const_reference; 00064 typedef const value_type* const_pointer; 00065 typedef ptrdiff_t difference_type;// STL Container requirement 00066 typedef int_vector<>::size_type size_type; // STL Container requirement 00067 typedef Coder coder; 00068 typedef RankSupport rank_support; 00069 typedef SelectSupport select_support; 00070 static const uint32_t sample_dens = SampleDens; 00071 00072 friend class enc_vector_theo_const_iterator<enc_vector_theo<Coder, SampleDens, RankSupport, SelectSupport, fixedIntWidth> >; // STL Container requirement 00073 private: 00074 int_vector<0> m_z; // compressed bit stream 00075 int_vector<1> m_sample; // indicator for a sample 00076 00077 typename enc_vector_theo_trait<fixedIntWidth>::int_vector_type m_sample_pointer; 00078 typename enc_vector_theo_trait<fixedIntWidth>::int_vector_type m_sample_vals; 00079 00080 RankSupport m_sample_rank; // rank support for m_sample 00081 size_type m_elements; // number of elements 00082 00083 // workaround function for the constructor 00084 void construct() { 00085 m_elements = 0; 00086 } 00087 void copy(const enc_vector_theo& v); 00088 public: 00090 enc_vector_theo() { 00091 construct(); 00092 }; 00094 00097 enc_vector_theo(const enc_vector_theo& v); 00098 00100 00104 template<class Container> 00105 enc_vector_theo(const Container& c) { 00106 construct(); 00107 init(c); 00108 }; 00109 00110 template<class Container> 00111 void init(const Container& c); 00112 00114 ~enc_vector_theo() { 00115 }; 00116 00118 00122 size_type size()const; 00123 00125 00127 static size_type max_size(); 00128 00130 00135 bool empty() const; 00136 00138 00146 void swap(enc_vector_theo& v); 00147 00149 00153 const const_iterator begin()const; 00154 00156 00160 const const_iterator end()const; 00161 00162 // Iterator that points to the last element of the enc_vector_theo. 00163 /* 00164 * Required for the Container Concept of the STL. 00165 * \sa rend() 00166 */ 00167 // reverse_iterator rbegin()const; 00168 00169 // Iterator that points to the position before the first element of the enc_vector_theo. 00170 /* 00171 * Required for the Container Concept of the STL 00172 * \sa rbegin() 00173 */ 00174 // reverse_iterator rend()const; 00175 00177 00181 value_type operator[](size_type i)const; 00182 00184 00187 enc_vector_theo& operator=(const enc_vector_theo& v); 00188 00190 00198 bool operator==(const enc_vector_theo& v)const; 00199 00201 00209 bool operator!=(const enc_vector_theo& v)const; 00210 00212 00215 size_type serialize(std::ostream& out) const; 00216 00218 void load(std::istream& in); 00219 00221 00224 value_type sample(const size_type i) const; 00225 }; 00226 00227 00228 // TODO: uint64_t als value type ersetzen durch EncVector::value_type geht nicht??? mit using.... 00229 template<class EncVector> 00230 struct enc_vector_theo_const_iterator: public std::iterator<std::random_access_iterator_tag, uint64_t, ptrdiff_t> { 00231 typedef const reference const_reference; 00232 typedef enc_vector_theo_const_iterator<EncVector> const_iterator; 00233 typedef typename EncVector::size_type size_type; 00234 typedef typename EncVector::value_type value_type; 00235 typedef typename EncVector::difference_type difference_type; 00236 00237 const EncVector* m_v; // enc_vector_theo the interator belongs to 00238 value_type m_decoded_val[((size_type(EncVector::sample_dens))<<6)/EncVector::coder::min_codeword_length+1];// buffer for decoded values 00239 static const size_type m_decoded_val_size = ((size_type(EncVector::sample_dens))<<6)/EncVector::coder::min_codeword_length;// buffer for decoded values 00240 size_type m_decoded_val_start_idx; // Index of the first element that is buffered 00241 size_type m_decoded_val_end_idx; // Index of the first element after the buffered elements 00242 size_type m_idx; // Index of the current element 00243 00245 00249 enc_vector_theo_const_iterator(const EncVector* v, size_type idx=0) { 00250 m_v = v; 00251 m_idx = idx; 00252 m_decoded_val_start_idx = 0; 00253 m_decoded_val_end_idx = 0; 00254 } 00255 00256 enc_vector_theo_const_iterator(const enc_vector_theo_const_iterator& it) { 00257 enc_vector_theo_const_iterator(it.m_v, it.m_idx); 00258 } 00259 00260 ~enc_vector_theo_const_iterator() { } 00261 00262 inline const_reference operator*(); 00263 00265 const_iterator& operator++() { 00266 ++m_idx; 00267 return *this; 00268 } 00269 00271 const_iterator operator++(int x) { 00272 enc_vector_theo_const_iterator it = *this; 00273 ++(*this); 00274 return it; 00275 } 00276 00278 const_iterator& operator--() { 00279 --m_idx; 00280 return *this; 00281 } 00282 00284 const_iterator operator--(int x) { 00285 enc_vector_theo_const_iterator it = *this; 00286 ++(*this); 00287 return it; 00288 } 00289 00290 const_iterator& operator+=(difference_type i) { 00291 if (i<0) 00292 return *this -= (-i); 00293 m_idx += i; 00294 return *this; 00295 } 00296 00297 const_iterator& operator-=(difference_type i) { 00298 if (i<0) 00299 return *this += (-i); 00300 m_idx -= i; 00301 return *this += -i; 00302 } 00303 00304 const_iterator operator+(difference_type i) const { 00305 const_iterator it = *this; 00306 return it += i; 00307 } 00308 00309 const_iterator operator-(difference_type i) const { 00310 const_iterator it = *this; 00311 return it -= i; 00312 } 00313 00314 const_reference operator[](difference_type i) const { 00315 return *(*this + i); 00316 } 00317 00318 bool operator==(const enc_vector_theo_const_iterator& it)const { 00319 // supported vectors and index have to be equal 00320 return m_idx == it.m_idx and m_v == it.m_v; 00321 } 00322 00323 bool operator!=(const enc_vector_theo_const_iterator& it)const { 00324 return m_idx != it.m_idx or m_v != it.m_v; 00325 } 00326 00327 bool operator<(const enc_vector_theo_const_iterator& it)const { 00328 return m_idx < it.m_idx; 00329 } 00330 00331 bool operator>(const enc_vector_theo_const_iterator& it)const { 00332 return m_idx > it.m_idx; 00333 } 00334 00335 bool operator>=(const enc_vector_theo_const_iterator& it)const { 00336 return !(*this < it); 00337 } 00338 00339 bool operator<=(const enc_vector_theo_const_iterator& it)const { 00340 return !(*this > it); 00341 } 00342 00343 }; 00344 00345 template<class Coder, 00346 uint32_t SampleDens, 00347 class RankSupport, 00348 class SelectSupport, 00349 uint8_t fixedIntWidth> 00350 inline typename enc_vector_theo<Coder, SampleDens, RankSupport, SelectSupport, fixedIntWidth>::value_type enc_vector_theo<Coder, SampleDens, RankSupport, SelectSupport, fixedIntWidth>::operator[](const size_type i)const 00351 { 00352 if (i+1 == 0 || i >= m_elements) { 00353 throw std::out_of_range("OUT_OF_RANGE_ERROR: enc_vector_theo::operator[](size_type); idx >= size()!"); 00354 return 0; 00355 } 00356 const size_type sample_rank = m_sample_rank.rank(i+1); 00357 if (m_sample[i]) { // -> sample_idx == i 00358 return m_sample_vals[sample_rank-1]; 00359 } 00360 size_type sample_idx = bit_magic::prev(m_sample.data(), i); 00361 size_type idx_of_sample_in_z = m_sample_pointer[sample_rank-1]; 00362 return m_sample_vals[sample_rank-1] + Coder::decode_prefix_sum(m_z.data(), idx_of_sample_in_z, i-sample_idx); 00363 } 00364 00365 template<class Coder, 00366 uint32_t SampleDens, 00367 class RankSupport, 00368 class SelectSupport, 00369 uint8_t fixedIntWidth> 00370 inline typename enc_vector_theo<Coder, SampleDens, RankSupport, SelectSupport, fixedIntWidth>::value_type enc_vector_theo<Coder, SampleDens, RankSupport, SelectSupport, fixedIntWidth>::sample(const size_type i)const 00371 { 00372 if (i >= m_sample_vals.size()) { 00373 throw std::out_of_range("OUT_OF_RANGE_ERROR: enc_vector_theo::sample(size_type); i*SampleDens >= size()!"); 00374 return 0; 00375 } 00376 return m_sample_vals[i]; 00377 } 00378 template<class Coder, 00379 uint32_t SampleDens, 00380 class RankSupport, 00381 class SelectSupport, 00382 uint8_t fixedIntWidth> 00383 inline enc_vector_theo<>::size_type enc_vector_theo<Coder, SampleDens, RankSupport, SelectSupport, fixedIntWidth>::size()const 00384 { 00385 return m_elements; 00386 } 00387 00388 template<class Coder, 00389 uint32_t SampleDens, 00390 class RankSupport, 00391 class SelectSupport, 00392 uint8_t fixedIntWidth> 00393 inline enc_vector_theo<>::size_type enc_vector_theo<Coder, SampleDens, RankSupport, SelectSupport, fixedIntWidth>::max_size() 00394 { 00395 return int_vector<>::max_size()/2; // each element could possible occupy double space with selfdelimiting codes 00396 } 00397 00398 template<class Coder, 00399 uint32_t SampleDens, 00400 class RankSupport, 00401 class SelectSupport, 00402 uint8_t fixedIntWidth> 00403 inline bool enc_vector_theo<Coder, SampleDens, RankSupport, SelectSupport, fixedIntWidth>::empty()const 00404 { 00405 return 0==m_elements; 00406 } 00407 00408 00409 template<class Coder, 00410 uint32_t SampleDens, 00411 class RankSupport, 00412 class SelectSupport, 00413 uint8_t fixedIntWidth> 00414 void enc_vector_theo<Coder, SampleDens,RankSupport, SelectSupport, fixedIntWidth>::copy(const enc_vector_theo<Coder, SampleDens,RankSupport, SelectSupport, fixedIntWidth>& v) 00415 { 00416 m_z = v.m_z; // copy compressed bit stream 00417 m_sample = v.m_sample; // copy indicator for a sample 00418 m_sample_pointer = v.m_sample_pointer; // copy sample pointer vector 00419 m_sample_vals = v.m_sample_vals; // copy sample values 00420 m_sample_rank = v.m_sample_rank; // copy rank support for sample indicator 00421 m_sample_rank.set_vector(&m_sample); // set rank supported bit_vector 00422 m_elements = v.m_elements; // copy number of stored elements 00423 } 00424 00425 template<class Coder, 00426 uint32_t SampleDens, 00427 class RankSupport, 00428 class SelectSupport, 00429 uint8_t fixedIntWidth> 00430 enc_vector_theo<Coder, SampleDens,RankSupport, SelectSupport, fixedIntWidth>::enc_vector_theo(const enc_vector_theo& v) 00431 { 00432 copy(v); 00433 } 00434 00435 template<class Coder, 00436 uint32_t SampleDens, 00437 class RankSupport, 00438 class SelectSupport, 00439 uint8_t fixedIntWidth> 00440 enc_vector_theo<Coder, SampleDens,RankSupport, SelectSupport, fixedIntWidth>& enc_vector_theo<Coder, SampleDens,RankSupport, SelectSupport, fixedIntWidth>::operator=(const enc_vector_theo<Coder, SampleDens,RankSupport, SelectSupport, fixedIntWidth>& v) 00441 { 00442 if (this != &v) {// if v and _this_ are not the same object 00443 copy(v); 00444 } 00445 return *this; 00446 } 00447 00448 template<class Coder, 00449 uint32_t SampleDens, 00450 class RankSupport, 00451 class SelectSupport, 00452 uint8_t fixedIntWidth> 00453 bool enc_vector_theo<Coder, SampleDens,RankSupport, SelectSupport, fixedIntWidth>::operator==(const enc_vector_theo<Coder, SampleDens,RankSupport, SelectSupport, fixedIntWidth>& v)const 00454 { 00455 if (this == &v) 00456 return true; 00457 return m_elements == v.m_elements 00458 and m_z == v.m_z 00459 and m_sample == v.m_sample 00460 and m_sample_pointer == v.m_sample_pointer 00461 and m_sample_vals == v.m_sample_vals 00462 and m_sample_rank == v.m_sample_rank; 00463 } 00464 00465 template<class Coder, 00466 uint32_t SampleDens, 00467 class RankSupport, 00468 class SelectSupport, 00469 uint8_t fixedIntWidth> 00470 bool enc_vector_theo<Coder, SampleDens,RankSupport, SelectSupport, fixedIntWidth>::operator!=(const enc_vector_theo<Coder, SampleDens,RankSupport, SelectSupport, fixedIntWidth>& v)const 00471 { 00472 return !(*this == v); 00473 } 00474 00475 template<class Coder, 00476 uint32_t SampleDens, 00477 class RankSupport, 00478 class SelectSupport, 00479 uint8_t fixedIntWidth> 00480 void enc_vector_theo<Coder, SampleDens,RankSupport, SelectSupport, fixedIntWidth>::swap(enc_vector_theo<Coder, SampleDens,RankSupport, SelectSupport, fixedIntWidth>& v) 00481 { 00482 if (this != &v) { // if v and _this_ are not the same object 00483 m_z.swap(v.m_z); // swap compressed bit streams 00484 m_sample_pointer.swap(v.m_sample_pointer); // swap the sample pointer vector 00485 m_sample_vals.swap(v.m_sample_vals); 00486 m_sample_rank.swap(v.m_sample_rank); // swap rank support for sample indicator 00487 // m_sample_select.swap(v.m_sample_select); // swap select support for sample indicator 00488 m_sample.swap(v.m_sample); // swap the sample vector 00489 std::swap(m_elements, v.m_elements);// swap the number of elements 00490 } 00491 } 00492 00493 00494 template<class Coder, 00495 uint32_t SampleDens, 00496 class RankSupport, 00497 class SelectSupport, 00498 uint8_t fixedIntWidth> 00499 template<class Container> 00500 void enc_vector_theo<Coder, SampleDens, RankSupport , SelectSupport, fixedIntWidth>::init(const Container& c) 00501 { 00502 // clear BitVectors 00503 m_z.resize(0); 00504 m_elements = 0; 00505 // m_inc_start.resize(0); 00506 m_sample.resize(0); 00507 m_sample_pointer.resize(0); 00508 m_sample_vals.resize(0); 00509 if (c.empty()) // if c is empty there is nothing to do... 00510 return; 00511 // m_inc_start.resize( c.size() ); util::setZeroBits(m_inc_start); 00512 m_sample.resize(c.size()); util::set_zero_bits(m_sample); 00513 typename Container::const_iterator it = c.begin(), end = c.end(); 00514 // determine the greatest value in the difference encoding 00515 typename Container::value_type max_value = 1;//*(it++) + 1; 00516 // determine the greatest value of the samples 00517 typename Container::value_type max_sample_value = (*it++) + 1; 00518 typename Container::value_type v1 = max_sample_value, v2; 00519 size_type z_size = 0;//Coder::encoding_length(v1); 00520 size_type sample_cnt = 1, max_sample_pointer = 0; 00521 m_sample[0] = 1; 00522 const size_type threshold = (SampleDens<<6); 00523 //std::cerr<<"calculate max_sample_pointer"<<std::endl; 00524 for (size_type i=1, z_partial_size = 0, elen=0; it != end; ++it,++i) { 00525 if (((v2=(*it)+1) <= v1) or (elen = Coder::encoding_length(v2-v1))+z_partial_size > threshold) { // start of an increasing sequence or force encoding of sample 00526 // if( max_value < v2 ) max_value = v2; 00527 if (max_sample_value < v2) max_sample_value = v2; 00528 m_sample[i] = 1; 00529 z_size += z_partial_size; 00530 max_sample_pointer = z_size; 00531 ++sample_cnt; 00532 z_partial_size = 0; 00533 } else { // part of an increasing sequence 00534 z_partial_size += elen; 00535 if (max_value < v2-v1) max_value = v2-v1; 00536 } 00537 v1 = v2; 00538 } 00539 //std::cerr<<"Calculate delta"<<std::endl; 00540 { 00541 int_vector<0> delta_c(c.size()-sample_cnt, 0, bit_magic::l1BP(max_value)+1); // Vector for difference encoding of c 00542 m_sample_pointer.set_int_width(bit_magic::l1BP(max_sample_pointer)+1); 00543 m_sample_pointer.resize(sample_cnt); 00544 m_sample_vals.set_int_width(bit_magic::l1BP(max_sample_value)+1); 00545 m_sample_vals.resize(sample_cnt); 00546 int_vector<0>::iterator d_it = delta_c.begin(); 00547 typename enc_vector_theo_trait<fixedIntWidth>::int_vector_type::iterator sp_it = m_sample_pointer.begin(); 00548 typename enc_vector_theo_trait<fixedIntWidth>::int_vector_type::iterator sv_it = m_sample_vals.begin(); 00549 it = c.begin(); 00550 z_size = 0; 00551 for (int_vector<1>::const_iterator s_it = m_sample.begin(); it != c.end(); ++it, ++s_it) { 00552 v2 = *it+1; 00553 if (*s_it) {// if sample 00554 *(sv_it++) = v2-1; 00555 *(sp_it++) = z_size; 00556 // std::cerr<<"sample "<<v2<<std::endl; 00557 } else { 00558 *d_it = v2-v1; 00559 z_size += Coder::encoding_length(*d_it); 00560 ++d_it; 00561 } 00562 v1 = v2; 00563 } 00564 //std::cerr<<"Encode delta "<<delta_c.size()<<" integers"<<std::endl; 00565 Coder::encode(delta_c, m_z); // encode delta_c to m_z 00566 } 00567 // delta_c.resize(0); 00568 //std::cerr<<"Calc rank"<<std::endl; 00569 util::init_support(m_sample_rank, &m_sample); // init rank for m_sample 00570 //std::cerr<<"Calc select"<<std::endl; 00571 //std::cerr<<"Finished "<<std::endl;, 00572 m_elements = c.size(); 00573 } 00574 00575 template<class Coder, 00576 uint32_t SampleDens, 00577 class RankSupport, 00578 class SelectSupport, 00579 uint8_t fixedIntWidth> 00580 enc_vector_theo<>::size_type enc_vector_theo<Coder, SampleDens,RankSupport, SelectSupport, fixedIntWidth>::serialize(std::ostream& out) const 00581 { 00582 size_type written_bytes = 0; 00583 out.write((char*) &m_elements, sizeof(m_elements)); 00584 written_bytes += sizeof(m_elements); 00585 written_bytes += m_z.serialize(out); 00586 written_bytes += m_sample.serialize(out); 00587 written_bytes += m_sample_pointer.serialize(out); 00588 written_bytes += m_sample_vals.serialize(out); 00589 written_bytes += m_sample_rank.serialize(out); 00590 return written_bytes; 00591 } 00592 00593 template<class Coder, 00594 uint32_t SampleDens, 00595 class RankSupport, 00596 class SelectSupport, 00597 uint8_t fixedIntWidth> 00598 void enc_vector_theo<Coder, SampleDens,RankSupport, SelectSupport, fixedIntWidth>::load(std::istream& in) 00599 { 00600 in.read((char*) &m_elements, sizeof(m_elements)); 00601 m_z.load(in); 00602 m_sample.load(in); 00603 m_sample_pointer.load(in); 00604 m_sample_vals.load(in); 00605 m_sample_rank.load(in, &m_sample); 00606 } 00607 00608 template<class Coder, 00609 uint32_t SampleDens, 00610 class RankSupport, 00611 class SelectSupport, 00612 uint8_t fixedIntWidth> 00613 const typename enc_vector_theo<Coder,SampleDens, RankSupport, SelectSupport, fixedIntWidth>::const_iterator enc_vector_theo<Coder, SampleDens,RankSupport, SelectSupport, fixedIntWidth>::begin()const 00614 { 00615 return const_iterator(this, 0); 00616 } 00617 00618 template<class Coder, 00619 uint32_t SampleDens, 00620 class RankSupport, 00621 class SelectSupport, 00622 uint8_t fixedIntWidth> 00623 const typename enc_vector_theo<Coder,SampleDens,RankSupport, SelectSupport, fixedIntWidth>::const_iterator enc_vector_theo<Coder, SampleDens,RankSupport, SelectSupport, fixedIntWidth>::end()const 00624 { 00625 return const_iterator(this, this->m_elements); 00626 } 00627 00628 template<class EncVector> 00629 typename enc_vector_theo_const_iterator<EncVector>::const_reference enc_vector_theo_const_iterator<EncVector>::operator*() 00630 { 00631 // if requested element is buffered 00632 if (m_idx >= m_decoded_val_start_idx and m_idx < m_decoded_val_end_idx) { 00633 return m_decoded_val[m_idx-m_decoded_val_start_idx]; 00634 } else { 00635 if (m_idx >= m_v->m_elements) { 00636 throw std::out_of_range("enc_vector_theo_const_iterator: dereferencing failed!"); 00637 } 00638 size_type sample_rank = m_v->m_sample_rank.rank(m_idx+1);// get number of samples before m_idx 00639 size_type sample_idx = bit_magic::prev(m_v->m_sample.data(), m_idx); 00640 00641 size_type idx_of_sample_in_z = m_v->m_sample_pointer[sample_rank-1];// get idx in z 00642 size_type n = 0;//m_decoded_val_size; 00643 if (sample_rank == m_v->m_sample_vals.size()) {// last one 00644 n = m_v->m_elements - sample_idx; 00645 } else { 00646 n = bit_magic::next(m_v->m_sample.data(), m_idx) - sample_idx; 00647 } 00648 if (n>1) { 00649 EncVector::coder::template decode<false, true, uint64_t* >(m_v->m_z.data(), idx_of_sample_in_z, n-1, m_decoded_val+1); 00650 } 00651 m_decoded_val_start_idx = sample_idx; 00652 m_decoded_val_end_idx = m_decoded_val_start_idx + n; 00653 value_type* vals = m_decoded_val; 00654 *(vals++) = m_v->m_sample_vals[sample_rank-1]; 00655 for (size_type i=1; i<n; ++i, ++vals) { 00656 *vals += *(vals-1); 00657 } 00658 return m_decoded_val[m_idx-m_decoded_val_start_idx]; 00659 } 00660 } 00661 00662 } // end namespace sdsl 00663 00664 #endif