SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/vlc_vector.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2008 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #include "int_vector.hpp"
00022 #include "elias_delta_coder.hpp"
00023 #include "iterators.hpp"
00024 
00025 #ifndef SDSL_VLC_VECTOR
00026 #define SDSL_VLC_VECTOR
00027 
00029 namespace sdsl
00030 {
00031 
00032 template<uint8_t fixedIntWidth>
00033 struct vlc_vector_trait {
00034     typedef int_vector<0> int_vector_type;
00035 };
00036 
00037 template<>
00038 struct vlc_vector_trait<32> {
00039     typedef int_vector<32> int_vector_type;
00040 };
00041 
00043 
00047 template<class Coder=coder::elias_delta,
00048          uint32_t SampleDens = 16, uint8_t fixedIntWidth=0>
00049 class vlc_vector
00050 {
00051     public:
00052         typedef uint64_t                                                        value_type;     // STL Container requirement
00053         typedef random_access_const_iterator<vlc_vector> iterator;// STL Container requirement
00054         typedef iterator                                                        const_iterator; // STL Container requirement
00055         typedef const value_type                                        reference;
00056         typedef const value_type                                        const_reference;
00057         typedef const value_type*                                       const_pointer;
00058         typedef ptrdiff_t                                                       difference_type;// STL Container requirement
00059         typedef int_vector<>::size_type                         size_type;              // STL Container requirement
00060         typedef Coder                                                           coder;
00061         typedef typename vlc_vector_trait<fixedIntWidth>::int_vector_type int_vector_type;
00062         static  const uint32_t                                          sample_dens     = SampleDens;
00063 
00064         int_vector<0>   m_z;            // compressed bit stream
00065     private:
00066         int_vector_type   m_sample_pointer;
00067         size_type               m_elements;    // number of elements
00068         uint32_t                m_sample_dens;
00069 
00070         // workaround function for the constructor
00071         void construct() {
00072             m_elements = 0;
00073             m_sample_dens = 16;
00074         }
00075         void copy(const vlc_vector& v);
00076 
00077         void clear() {
00078             m_z.resize(0);
00079             m_elements = 0;
00080             m_sample_pointer.resize(0);
00081         }
00082 
00083     public:
00085         vlc_vector() {
00086             construct();
00087         };
00089 
00092         vlc_vector(const vlc_vector& v);
00093 
00095 
00098         template<class Container>
00099         vlc_vector(const Container& c) {
00100             construct();
00101             init(c);
00102         };
00103 
00105         /*
00106             \param v_buf A int_vector_file_buf.
00107                 \pre No two adjacent values should be equal.
00108         */
00109         template<uint8_t int_width, class size_type_class>
00110         vlc_vector(int_vector_file_buffer<int_width, size_type_class>& v_buf) {
00111             construct();
00112             init(v_buf);
00113         }
00114 
00115         template<class Container>
00116         void init(const Container& c);
00117 
00118         template<uint8_t int_width, class size_type_class>
00119         void init(int_vector_file_buffer<int_width, size_type_class>& v_buf);
00120 
00122         ~vlc_vector() {
00123         };
00124 
00126 
00130         size_type size()const;
00131 
00133 
00135         static size_type max_size();
00136 
00138 
00143         bool empty() const;
00144 
00146 
00154         void swap(vlc_vector& v);
00155 
00157 
00161         const const_iterator begin()const;
00162 
00164 
00168         const const_iterator end()const;
00169 
00170         // Iterator that points to the last element of the vlc_vector.
00171         /*
00172          *      Required for the Container Concept of the STL.
00173          *  \sa rend()
00174          */
00175 //              reverse_iterator rbegin()const;
00176 
00177         // Iterator that points to the position before the first element of the vlc_vector.
00178         /*
00179          *      Required for the Container Concept of the STL
00180          *  \sa rbegin()
00181          */
00182 //              reverse_iterator rend()const;
00183 
00185 
00189         value_type operator[](size_type i)const;
00190 
00192 
00195         vlc_vector& operator=(const vlc_vector& v);
00196 
00198 
00206         bool operator==(const vlc_vector& v)const;
00207 
00209 
00217         bool operator!=(const vlc_vector& v)const;
00218 
00220 
00223         size_type serialize(std::ostream& out) const;
00224 
00226         void load(std::istream& in);
00227 
00229 
00232         value_type sample(const size_type i) const;
00233 
00234         uint32_t get_sample_dens() const;
00235         void set_sample_dens(const uint32_t sample_dens);
00236 };
00237 
00238 
00239 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00240 inline uint32_t vlc_vector<Coder, SampleDens, fixedIntWidth>::get_sample_dens() const
00241 {
00242     if (SampleDens == 0)
00243         return m_sample_dens;
00244     else
00245         return SampleDens;
00246 }
00247 
00248 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00249 inline void vlc_vector<Coder, SampleDens, fixedIntWidth>::set_sample_dens(const uint32_t sample_dens)
00250 {
00251     m_sample_dens = sample_dens;
00252 }
00253 
00254 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00255 inline typename vlc_vector<Coder, SampleDens,fixedIntWidth>::value_type vlc_vector<Coder, SampleDens,fixedIntWidth>::operator[](const size_type i)const
00256 {
00257     assert(i+1 != 0);
00258 #ifdef SDSL_DEBUG
00259     if (i >= m_elements) {
00260         throw std::out_of_range("OUT_OF_RANGE_ERROR: vlc_vector::operator[](size_type); i >= size()!");
00261         return 0;
00262     }
00263 #endif
00264     size_type idx = i/get_sample_dens();
00265     return (Coder::template decode<false, false, int*>(m_z.data(), m_sample_pointer[idx], i-SampleDens*idx+1)) - 1;
00266 }
00267 
00268 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00269 inline vlc_vector<>::size_type vlc_vector<Coder, SampleDens,fixedIntWidth>::size()const
00270 {
00271     return m_elements;
00272 }
00273 
00274 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00275 inline vlc_vector<>::size_type vlc_vector<Coder, SampleDens,fixedIntWidth>::max_size()
00276 {
00277     return int_vector<>::max_size()/2; // each element could possible occupy double space with selfdelimiting codes
00278 }
00279 
00280 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00281 inline bool vlc_vector<Coder, SampleDens,fixedIntWidth>::empty()const
00282 {
00283     return 0==m_elements;
00284 }
00285 
00286 
00287 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00288 void vlc_vector<Coder, SampleDens,fixedIntWidth>::copy(const vlc_vector<Coder, SampleDens,fixedIntWidth>& v)
00289 {
00290     m_z                                 = v.m_z;                                // copy compressed bit stream
00291     m_sample_pointer            = v.m_sample_pointer;      // copy sample values
00292     m_elements                  = v.m_elements;                 // copy number of stored elements
00293 }
00294 
00295 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00296 vlc_vector<Coder, SampleDens,fixedIntWidth>::vlc_vector(const vlc_vector& v)
00297 {
00298     copy(v);
00299 }
00300 
00301 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00302 vlc_vector<Coder, SampleDens,fixedIntWidth>& vlc_vector<Coder, SampleDens,fixedIntWidth>::operator=(const vlc_vector<Coder, SampleDens,fixedIntWidth>& v)
00303 {
00304     if (this != &v) {// if v and _this_ are not the same object
00305         copy(v);
00306     }
00307     return *this;
00308 }
00309 
00310 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00311 bool vlc_vector<Coder, SampleDens,fixedIntWidth>::operator==(const vlc_vector<Coder, SampleDens,fixedIntWidth>& v)const
00312 {
00313     if (this == &v)
00314         return true;
00315     return              m_elements == v.m_elements
00316                 and     m_z == v.m_z
00317                 and     m_sample_pointer == v.m_sample_pointer;
00318 }
00319 
00320 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00321 bool vlc_vector<Coder, SampleDens,fixedIntWidth>::operator!=(const vlc_vector<Coder, SampleDens,fixedIntWidth>& v)const
00322 {
00323     return !(*this == v);
00324 }
00325 
00326 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00327 void vlc_vector<Coder, SampleDens,fixedIntWidth>::swap(vlc_vector<Coder, SampleDens,fixedIntWidth>& v)
00328 {
00329     if (this != &v) { // if v and _this_ are not the same object
00330         m_z.swap(v.m_z);                                        // swap compressed bit streams
00331         m_sample_pointer.swap(v.m_sample_pointer);
00332         std::swap(m_elements, v.m_elements);// swap the number of elements
00333     }
00334 }
00335 
00336 
00337 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00338 template<class Container>
00339 void vlc_vector<Coder, SampleDens,fixedIntWidth>::init(const Container& c)
00340 {
00341     // clear bit_vectors
00342     clear();
00343 
00344     if (c.empty())  // if c is empty there is nothing to do...
00345         return;
00346     size_type samples = 0, z_size = 0;
00347 //  (1) Calculate size of z
00348     for (size_type i=0; i < c.size(); ++i) {
00349         if (c[i]+1<1) {
00350             throw std::logic_error("vlc_vector cannot decode values smaller than 1!");
00351         }
00352         z_size += Coder::encoding_length(c[i]+1);
00353     }
00354     samples = (c.size()+get_sample_dens()-1)/get_sample_dens();
00355 //      (2) Write z
00356 
00357     m_sample_pointer.set_int_width(bit_magic::l1BP(z_size+1) + 1);
00358     m_sample_pointer.resize(samples+1); // add 1 for last entry
00359 
00360     std::cout<<"z_size="<<z_size<<std::endl;
00361     std::cout<<"int_width = "<<bit_magic::l1BP(z_size+1)+1<<std::endl;
00362     std::cout<<samples<<std::endl;
00363 
00364     m_z.bit_resize(z_size);
00365     z_size = 0;
00366     uint64_t* z_data = Coder::raw_data(m_z);
00367     uint8_t offset = 0;
00368     size_type no_sample = 0;
00369     for (size_type i=0, sample_cnt=0; i < c.size(); ++i, --no_sample) {
00370         if (!no_sample) { // add a sample pointer
00371             no_sample = get_sample_dens();
00372             m_sample_pointer[sample_cnt++] = z_size;
00373         }
00374         Coder::encode(c[i]+1, z_data, offset);
00375         z_size += Coder::encoding_length(c[i]+1);
00376     }
00377     m_elements = c.size();
00378 }
00379 
00380 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00381 template<uint8_t int_width, class size_type_class>
00382 void vlc_vector<Coder, SampleDens,fixedIntWidth>::init(int_vector_file_buffer<int_width, size_type_class>& v_buf)
00383 {
00384     // clear bit_vectors
00385     clear();
00386     size_type n = v_buf.int_vector_size;
00387     if (n == 0)  // if c is empty there is nothing to do...
00388         return;
00389     v_buf.reset();
00390     size_type samples=0, z_size=0;
00391 //  (1) Calculate size of z
00392     for (size_type i=0, r_sum=0, r = v_buf.load_next_block(); r_sum < n;) {
00393         for (; i < r_sum+r; ++i) {
00394             size_type x = v_buf[i-r_sum]+1;
00395             if (x < 1) {
00396                 throw std::logic_error("vlc_vector cannot decode values smaller than 1!");
00397             }
00398             z_size += Coder::encoding_length(x);
00399         }
00400         r_sum += r; r = v_buf.load_next_block();
00401     }
00402     samples = (n+get_sample_dens()-1)/get_sample_dens();
00403 //      (2) Write z
00404 
00405     m_sample_pointer.set_int_width(bit_magic::l1BP(z_size+1) + 1);
00406     m_sample_pointer.resize(samples+1); // add 1 for last entry
00407 
00408 //     (b) Initilize bit_vector for encoded data
00409     m_z.bit_resize(z_size);
00410     z_size = 0;
00411     uint64_t* z_data = Coder::raw_data(m_z);
00412     uint8_t offset = 0;
00413 
00414 //     (c) Write sample values and deltas
00415     v_buf.reset();
00416     size_type no_sample = 0;
00417     for (size_type i=0, sample_cnt = 0, r_sum=0, r = v_buf.load_next_block(); r_sum < n;) {
00418         for (; i < r_sum+r; ++i, --no_sample) {
00419             if (!no_sample) { // add a sample pointer
00420                 no_sample = get_sample_dens();
00421                 m_sample_pointer[sample_cnt++] = z_size;
00422             }
00423             size_type x = v_buf[i-r_sum]+1;
00424             Coder::encode(x, z_data, offset);   // write encoded values
00425             z_size += Coder::encoding_length(x);
00426         }
00427         r_sum += r; r = v_buf.load_next_block();
00428     }
00429     m_elements = n;
00430 }
00431 
00432 
00433 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00434 vlc_vector<>::size_type vlc_vector<Coder, SampleDens,fixedIntWidth>::serialize(std::ostream& out) const
00435 {
00436     size_type written_bytes = 0;
00437     out.write((char*) &m_elements, sizeof(m_elements));
00438     written_bytes += sizeof(m_elements);
00439     written_bytes += m_z.serialize(out);
00440     written_bytes += m_sample_pointer.serialize(out);
00441     return written_bytes;
00442 }
00443 
00444 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00445 void vlc_vector<Coder, SampleDens,fixedIntWidth>::load(std::istream& in)
00446 {
00447     in.read((char*) &m_elements, sizeof(m_elements));
00448     m_z.load(in);
00449     m_sample_pointer.load(in);
00450 }
00451 
00452 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00453 const typename vlc_vector<Coder,SampleDens,fixedIntWidth>::const_iterator vlc_vector<Coder, SampleDens,fixedIntWidth>::begin()const
00454 {
00455     return const_iterator(this, 0);
00456 }
00457 
00458 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00459 const typename vlc_vector<Coder,SampleDens,fixedIntWidth>::const_iterator vlc_vector<Coder, SampleDens,fixedIntWidth>::end()const
00460 {
00461     return const_iterator(this, this->m_elements);
00462 }
00463 
00464 
00465 } // end namespace sdsl
00466 
00467 #endif