SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2008 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #include "int_vector.hpp" 00022 #include "elias_delta_coder.hpp" 00023 #include "iterators.hpp" 00024 00025 #ifndef SDSL_VLC_VECTOR 00026 #define SDSL_VLC_VECTOR 00027 00029 namespace sdsl 00030 { 00031 00032 template<uint8_t fixedIntWidth> 00033 struct vlc_vector_trait { 00034 typedef int_vector<0> int_vector_type; 00035 }; 00036 00037 template<> 00038 struct vlc_vector_trait<32> { 00039 typedef int_vector<32> int_vector_type; 00040 }; 00041 00043 00047 template<class Coder=coder::elias_delta, 00048 uint32_t SampleDens = 16, uint8_t fixedIntWidth=0> 00049 class vlc_vector 00050 { 00051 public: 00052 typedef uint64_t value_type; // STL Container requirement 00053 typedef random_access_const_iterator<vlc_vector> iterator;// STL Container requirement 00054 typedef iterator const_iterator; // STL Container requirement 00055 typedef const value_type reference; 00056 typedef const value_type const_reference; 00057 typedef const value_type* const_pointer; 00058 typedef ptrdiff_t difference_type;// STL Container requirement 00059 typedef int_vector<>::size_type size_type; // STL Container requirement 00060 typedef Coder coder; 00061 typedef typename vlc_vector_trait<fixedIntWidth>::int_vector_type int_vector_type; 00062 static const uint32_t sample_dens = SampleDens; 00063 00064 int_vector<0> m_z; // compressed bit stream 00065 private: 00066 int_vector_type m_sample_pointer; 00067 size_type m_elements; // number of elements 00068 uint32_t m_sample_dens; 00069 00070 // workaround function for the constructor 00071 void construct() { 00072 m_elements = 0; 00073 m_sample_dens = 16; 00074 } 00075 void copy(const vlc_vector& v); 00076 00077 void clear() { 00078 m_z.resize(0); 00079 m_elements = 0; 00080 m_sample_pointer.resize(0); 00081 } 00082 00083 public: 00085 vlc_vector() { 00086 construct(); 00087 }; 00089 00092 vlc_vector(const vlc_vector& v); 00093 00095 00098 template<class Container> 00099 vlc_vector(const Container& c) { 00100 construct(); 00101 init(c); 00102 }; 00103 00105 /* 00106 \param v_buf A int_vector_file_buf. 00107 \pre No two adjacent values should be equal. 00108 */ 00109 template<uint8_t int_width, class size_type_class> 00110 vlc_vector(int_vector_file_buffer<int_width, size_type_class>& v_buf) { 00111 construct(); 00112 init(v_buf); 00113 } 00114 00115 template<class Container> 00116 void init(const Container& c); 00117 00118 template<uint8_t int_width, class size_type_class> 00119 void init(int_vector_file_buffer<int_width, size_type_class>& v_buf); 00120 00122 ~vlc_vector() { 00123 }; 00124 00126 00130 size_type size()const; 00131 00133 00135 static size_type max_size(); 00136 00138 00143 bool empty() const; 00144 00146 00154 void swap(vlc_vector& v); 00155 00157 00161 const const_iterator begin()const; 00162 00164 00168 const const_iterator end()const; 00169 00170 // Iterator that points to the last element of the vlc_vector. 00171 /* 00172 * Required for the Container Concept of the STL. 00173 * \sa rend() 00174 */ 00175 // reverse_iterator rbegin()const; 00176 00177 // Iterator that points to the position before the first element of the vlc_vector. 00178 /* 00179 * Required for the Container Concept of the STL 00180 * \sa rbegin() 00181 */ 00182 // reverse_iterator rend()const; 00183 00185 00189 value_type operator[](size_type i)const; 00190 00192 00195 vlc_vector& operator=(const vlc_vector& v); 00196 00198 00206 bool operator==(const vlc_vector& v)const; 00207 00209 00217 bool operator!=(const vlc_vector& v)const; 00218 00220 00223 size_type serialize(std::ostream& out) const; 00224 00226 void load(std::istream& in); 00227 00229 00232 value_type sample(const size_type i) const; 00233 00234 uint32_t get_sample_dens() const; 00235 void set_sample_dens(const uint32_t sample_dens); 00236 }; 00237 00238 00239 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00240 inline uint32_t vlc_vector<Coder, SampleDens, fixedIntWidth>::get_sample_dens() const 00241 { 00242 if (SampleDens == 0) 00243 return m_sample_dens; 00244 else 00245 return SampleDens; 00246 } 00247 00248 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00249 inline void vlc_vector<Coder, SampleDens, fixedIntWidth>::set_sample_dens(const uint32_t sample_dens) 00250 { 00251 m_sample_dens = sample_dens; 00252 } 00253 00254 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00255 inline typename vlc_vector<Coder, SampleDens,fixedIntWidth>::value_type vlc_vector<Coder, SampleDens,fixedIntWidth>::operator[](const size_type i)const 00256 { 00257 assert(i+1 != 0); 00258 #ifdef SDSL_DEBUG 00259 if (i >= m_elements) { 00260 throw std::out_of_range("OUT_OF_RANGE_ERROR: vlc_vector::operator[](size_type); i >= size()!"); 00261 return 0; 00262 } 00263 #endif 00264 size_type idx = i/get_sample_dens(); 00265 return (Coder::template decode<false, false, int*>(m_z.data(), m_sample_pointer[idx], i-SampleDens*idx+1)) - 1; 00266 } 00267 00268 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00269 inline vlc_vector<>::size_type vlc_vector<Coder, SampleDens,fixedIntWidth>::size()const 00270 { 00271 return m_elements; 00272 } 00273 00274 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00275 inline vlc_vector<>::size_type vlc_vector<Coder, SampleDens,fixedIntWidth>::max_size() 00276 { 00277 return int_vector<>::max_size()/2; // each element could possible occupy double space with selfdelimiting codes 00278 } 00279 00280 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00281 inline bool vlc_vector<Coder, SampleDens,fixedIntWidth>::empty()const 00282 { 00283 return 0==m_elements; 00284 } 00285 00286 00287 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00288 void vlc_vector<Coder, SampleDens,fixedIntWidth>::copy(const vlc_vector<Coder, SampleDens,fixedIntWidth>& v) 00289 { 00290 m_z = v.m_z; // copy compressed bit stream 00291 m_sample_pointer = v.m_sample_pointer; // copy sample values 00292 m_elements = v.m_elements; // copy number of stored elements 00293 } 00294 00295 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00296 vlc_vector<Coder, SampleDens,fixedIntWidth>::vlc_vector(const vlc_vector& v) 00297 { 00298 copy(v); 00299 } 00300 00301 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00302 vlc_vector<Coder, SampleDens,fixedIntWidth>& vlc_vector<Coder, SampleDens,fixedIntWidth>::operator=(const vlc_vector<Coder, SampleDens,fixedIntWidth>& v) 00303 { 00304 if (this != &v) {// if v and _this_ are not the same object 00305 copy(v); 00306 } 00307 return *this; 00308 } 00309 00310 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00311 bool vlc_vector<Coder, SampleDens,fixedIntWidth>::operator==(const vlc_vector<Coder, SampleDens,fixedIntWidth>& v)const 00312 { 00313 if (this == &v) 00314 return true; 00315 return m_elements == v.m_elements 00316 and m_z == v.m_z 00317 and m_sample_pointer == v.m_sample_pointer; 00318 } 00319 00320 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00321 bool vlc_vector<Coder, SampleDens,fixedIntWidth>::operator!=(const vlc_vector<Coder, SampleDens,fixedIntWidth>& v)const 00322 { 00323 return !(*this == v); 00324 } 00325 00326 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00327 void vlc_vector<Coder, SampleDens,fixedIntWidth>::swap(vlc_vector<Coder, SampleDens,fixedIntWidth>& v) 00328 { 00329 if (this != &v) { // if v and _this_ are not the same object 00330 m_z.swap(v.m_z); // swap compressed bit streams 00331 m_sample_pointer.swap(v.m_sample_pointer); 00332 std::swap(m_elements, v.m_elements);// swap the number of elements 00333 } 00334 } 00335 00336 00337 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00338 template<class Container> 00339 void vlc_vector<Coder, SampleDens,fixedIntWidth>::init(const Container& c) 00340 { 00341 // clear bit_vectors 00342 clear(); 00343 00344 if (c.empty()) // if c is empty there is nothing to do... 00345 return; 00346 size_type samples = 0, z_size = 0; 00347 // (1) Calculate size of z 00348 for (size_type i=0; i < c.size(); ++i) { 00349 if (c[i]+1<1) { 00350 throw std::logic_error("vlc_vector cannot decode values smaller than 1!"); 00351 } 00352 z_size += Coder::encoding_length(c[i]+1); 00353 } 00354 samples = (c.size()+get_sample_dens()-1)/get_sample_dens(); 00355 // (2) Write z 00356 00357 m_sample_pointer.set_int_width(bit_magic::l1BP(z_size+1) + 1); 00358 m_sample_pointer.resize(samples+1); // add 1 for last entry 00359 00360 std::cout<<"z_size="<<z_size<<std::endl; 00361 std::cout<<"int_width = "<<bit_magic::l1BP(z_size+1)+1<<std::endl; 00362 std::cout<<samples<<std::endl; 00363 00364 m_z.bit_resize(z_size); 00365 z_size = 0; 00366 uint64_t* z_data = Coder::raw_data(m_z); 00367 uint8_t offset = 0; 00368 size_type no_sample = 0; 00369 for (size_type i=0, sample_cnt=0; i < c.size(); ++i, --no_sample) { 00370 if (!no_sample) { // add a sample pointer 00371 no_sample = get_sample_dens(); 00372 m_sample_pointer[sample_cnt++] = z_size; 00373 } 00374 Coder::encode(c[i]+1, z_data, offset); 00375 z_size += Coder::encoding_length(c[i]+1); 00376 } 00377 m_elements = c.size(); 00378 } 00379 00380 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00381 template<uint8_t int_width, class size_type_class> 00382 void vlc_vector<Coder, SampleDens,fixedIntWidth>::init(int_vector_file_buffer<int_width, size_type_class>& v_buf) 00383 { 00384 // clear bit_vectors 00385 clear(); 00386 size_type n = v_buf.int_vector_size; 00387 if (n == 0) // if c is empty there is nothing to do... 00388 return; 00389 v_buf.reset(); 00390 size_type samples=0, z_size=0; 00391 // (1) Calculate size of z 00392 for (size_type i=0, r_sum=0, r = v_buf.load_next_block(); r_sum < n;) { 00393 for (; i < r_sum+r; ++i) { 00394 size_type x = v_buf[i-r_sum]+1; 00395 if (x < 1) { 00396 throw std::logic_error("vlc_vector cannot decode values smaller than 1!"); 00397 } 00398 z_size += Coder::encoding_length(x); 00399 } 00400 r_sum += r; r = v_buf.load_next_block(); 00401 } 00402 samples = (n+get_sample_dens()-1)/get_sample_dens(); 00403 // (2) Write z 00404 00405 m_sample_pointer.set_int_width(bit_magic::l1BP(z_size+1) + 1); 00406 m_sample_pointer.resize(samples+1); // add 1 for last entry 00407 00408 // (b) Initilize bit_vector for encoded data 00409 m_z.bit_resize(z_size); 00410 z_size = 0; 00411 uint64_t* z_data = Coder::raw_data(m_z); 00412 uint8_t offset = 0; 00413 00414 // (c) Write sample values and deltas 00415 v_buf.reset(); 00416 size_type no_sample = 0; 00417 for (size_type i=0, sample_cnt = 0, r_sum=0, r = v_buf.load_next_block(); r_sum < n;) { 00418 for (; i < r_sum+r; ++i, --no_sample) { 00419 if (!no_sample) { // add a sample pointer 00420 no_sample = get_sample_dens(); 00421 m_sample_pointer[sample_cnt++] = z_size; 00422 } 00423 size_type x = v_buf[i-r_sum]+1; 00424 Coder::encode(x, z_data, offset); // write encoded values 00425 z_size += Coder::encoding_length(x); 00426 } 00427 r_sum += r; r = v_buf.load_next_block(); 00428 } 00429 m_elements = n; 00430 } 00431 00432 00433 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00434 vlc_vector<>::size_type vlc_vector<Coder, SampleDens,fixedIntWidth>::serialize(std::ostream& out) const 00435 { 00436 size_type written_bytes = 0; 00437 out.write((char*) &m_elements, sizeof(m_elements)); 00438 written_bytes += sizeof(m_elements); 00439 written_bytes += m_z.serialize(out); 00440 written_bytes += m_sample_pointer.serialize(out); 00441 return written_bytes; 00442 } 00443 00444 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00445 void vlc_vector<Coder, SampleDens,fixedIntWidth>::load(std::istream& in) 00446 { 00447 in.read((char*) &m_elements, sizeof(m_elements)); 00448 m_z.load(in); 00449 m_sample_pointer.load(in); 00450 } 00451 00452 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00453 const typename vlc_vector<Coder,SampleDens,fixedIntWidth>::const_iterator vlc_vector<Coder, SampleDens,fixedIntWidth>::begin()const 00454 { 00455 return const_iterator(this, 0); 00456 } 00457 00458 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth> 00459 const typename vlc_vector<Coder,SampleDens,fixedIntWidth>::const_iterator vlc_vector<Coder, SampleDens,fixedIntWidth>::end()const 00460 { 00461 return const_iterator(this, this->m_elements); 00462 } 00463 00464 00465 } // end namespace sdsl 00466 00467 #endif