SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/enc_vector_prac2.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 20011 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #include "int_vector.hpp"
00022 #include "elias_delta_coder.hpp"
00023 #include "iterators.hpp"
00024 
00025 #ifndef SDSL_ENC_VECTOR_PRAC2
00026 #define SDSL_ENC_VECTOR_PRAC2
00027 
00029 namespace sdsl
00030 {
00031 
00032 template<uint8_t fixedIntWidth>
00033 struct enc_vector_prac2_trait {
00034     typedef int_vector<0> int_vector_type;
00035 };
00036 
00037 template<>
00038 struct enc_vector_prac2_trait<32> {
00039     typedef int_vector<32> int_vector_type;
00040 };
00041 
00043 
00049 template<class Coder=coder::elias_delta,
00050          uint32_t SampleDens = 8, uint8_t fixedIntWidth=0>
00051 class enc_vector_prac2
00052 {
00053     public:
00054         typedef uint64_t                                                        value_type;     // STL Container requirement
00055         typedef random_access_const_iterator<enc_vector_prac2> iterator;// STL Container requirement
00056         typedef iterator                                                        const_iterator; // STL Container requirement
00057         typedef const value_type                                        reference;
00058         typedef const value_type                                        const_reference;
00059         typedef const value_type*                                       const_pointer;
00060         typedef ptrdiff_t                                                       difference_type;// STL Container requirement
00061         typedef int_vector<>::size_type                         size_type;              // STL Container requirement
00062         typedef Coder                                                           coder;
00063         typedef typename enc_vector_prac2_trait<fixedIntWidth>::int_vector_type int_vector_type;
00064         static  const uint32_t                                          sample_dens     = SampleDens;
00065 
00066         int_vector<0>   m_z;            // compressed bit stream
00067     private:
00068         int_vector_type   m_sample_vals_and_pointer;
00069         size_type               m_elements;    // number of elements
00070         uint32_t                m_sample_dens;
00071 
00072         // workaround function for the constructor
00073         void construct() {
00074             m_elements = 0;
00075             m_sample_dens = 8;
00076         }
00077         void copy(const enc_vector_prac2& v);
00078 
00079         void clear() {
00080             m_z.resize(0);
00081             m_elements = 0;
00082             m_sample_vals_and_pointer.resize(0);
00083         }
00084 
00085     public:
00087         enc_vector_prac2() {
00088             construct();
00089         };
00091 
00094         enc_vector_prac2(const enc_vector_prac2& v);
00095 
00097 
00100         template<class Container>
00101         enc_vector_prac2(const Container& c) {
00102             construct();
00103             init(c);
00104         };
00105 
00107         /*
00108             \param v_buf A int_vector_file_buf.
00109                 \pre No two adjacent values should be equal.
00110         */
00111         template<uint8_t int_width, class size_type_class>
00112         enc_vector_prac2(int_vector_file_buffer<int_width, size_type_class>& v_buf) {
00113             construct();
00114             init(v_buf);
00115         }
00116 
00117         template<class Container>
00118         void init(const Container& c);
00119 
00120         template<uint8_t int_width, class size_type_class>
00121         void init(int_vector_file_buffer<int_width, size_type_class>& v_buf);
00122 
00124         ~enc_vector_prac2() {
00125         };
00126 
00128 
00132         size_type size()const;
00133 
00135 
00137         static size_type max_size();
00138 
00140 
00145         bool empty() const;
00146 
00148 
00156         void swap(enc_vector_prac2& v);
00157 
00159 
00163         const const_iterator begin()const;
00164 
00166 
00170         const const_iterator end()const;
00171 
00172         // Iterator that points to the last element of the enc_vector_prac2.
00173         /*
00174          *      Required for the Container Concept of the STL.
00175          *  \sa rend()
00176          */
00177 //              reverse_iterator rbegin()const;
00178 
00179         // Iterator that points to the position before the first element of the enc_vector_prac2.
00180         /*
00181          *      Required for the Container Concept of the STL
00182          *  \sa rbegin()
00183          */
00184 //              reverse_iterator rend()const;
00185 
00187 
00191         value_type operator[](size_type i)const;
00192 
00194 
00197         enc_vector_prac2& operator=(const enc_vector_prac2& v);
00198 
00200 
00208         bool operator==(const enc_vector_prac2& v)const;
00209 
00211 
00219         bool operator!=(const enc_vector_prac2& v)const;
00220 
00222 
00225         size_type serialize(std::ostream& out) const;
00226 
00228         void load(std::istream& in);
00229 
00231 
00234         value_type sample(const size_type i) const;
00235 
00236         uint32_t get_sample_dens() const;
00237         void set_sample_dens(const uint32_t sample_dens);
00238 
00243 //              template<class Iterator>
00244         void get_inter_sampled_values(const size_type i, uint64_t*& it)const {
00245             *(it++) = 0;
00246             if (i*SampleDens + SampleDens - 1 < size()) {
00247                 Coder::template decode<true, true>(m_z.data(), m_sample_vals_and_pointer[(i<<1)+1], SampleDens - 1, it);
00248             } else {
00249                 assert(i*SampleDens < size());
00250                 Coder::template decode<true, true>(m_z.data(), m_sample_vals_and_pointer[(i<<1)+1], size()-i*SampleDens - 1, it);
00251             }
00252         };
00253 };
00254 
00255 
00256 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00257 inline uint32_t enc_vector_prac2<Coder, SampleDens, fixedIntWidth>::get_sample_dens() const
00258 {
00259     if (SampleDens == 0)
00260         return m_sample_dens;
00261     else
00262         return SampleDens;
00263 }
00264 
00265 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00266 inline void enc_vector_prac2<Coder, SampleDens, fixedIntWidth>::set_sample_dens(const uint32_t sample_dens)
00267 {
00268     m_sample_dens = sample_dens;
00269 }
00270 
00271 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00272 inline typename enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::value_type enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::operator[](const size_type i)const
00273 {
00274     assert(i+1 != 0);
00275 #ifdef SDSL_DEBUG
00276     if (i >= m_elements) {
00277         throw std::out_of_range("OUT_OF_RANGE_ERROR: enc_vector_prac2::operator[](size_type); i >= size()!");
00278         return 0;
00279     }
00280 #endif
00281     size_type idx = i/get_sample_dens();
00282     return m_sample_vals_and_pointer[idx<<1] + Coder::decode_prefix_sum(m_z.data(), m_sample_vals_and_pointer[(idx<<1)+1], i-SampleDens*idx);
00283 }
00284 
00285 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00286 inline typename enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::value_type enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::sample(const size_type i)const
00287 {
00288     assert(i*get_sample_dens()+1 != 0);
00289 #ifdef SDSL_DEBUG
00290     if (i*get_sample_dens() >= m_elements) {
00291         throw std::out_of_range("OUT_OF_RANGE_ERROR: enc_vector_prac2::sample(size_type); i*get_sample_dens() >= size()!");
00292         return 0;
00293     }
00294 #endif
00295     return m_sample_vals_and_pointer[i<<1];
00296 }
00297 
00298 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00299 inline enc_vector_prac2<>::size_type enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::size()const
00300 {
00301     return m_elements;
00302 }
00303 
00304 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00305 inline enc_vector_prac2<>::size_type enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::max_size()
00306 {
00307     return int_vector<>::max_size()/2; // each element could possible occupy double space with selfdelimiting codes
00308 }
00309 
00310 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00311 inline bool enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::empty()const
00312 {
00313     return 0==m_elements;
00314 }
00315 
00316 
00317 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00318 void enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::copy(const enc_vector_prac2<Coder, SampleDens,fixedIntWidth>& v)
00319 {
00320     m_z                                 = v.m_z;                                // copy compressed bit stream
00321     m_sample_vals_and_pointer           = v.m_sample_vals_and_pointer;      // copy sample values
00322     m_elements                  = v.m_elements;                 // copy number of stored elements
00323 }
00324 
00325 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00326 enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::enc_vector_prac2(const enc_vector_prac2& v)
00327 {
00328     copy(v);
00329 }
00330 
00331 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00332 enc_vector_prac2<Coder, SampleDens,fixedIntWidth>& enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::operator=(const enc_vector_prac2<Coder, SampleDens,fixedIntWidth>& v)
00333 {
00334     if (this != &v) {// if v and _this_ are not the same object
00335         copy(v);
00336     }
00337     return *this;
00338 }
00339 
00340 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00341 bool enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::operator==(const enc_vector_prac2<Coder, SampleDens,fixedIntWidth>& v)const
00342 {
00343     if (this == &v)
00344         return true;
00345     return              m_elements == v.m_elements
00346                 and     m_z == v.m_z
00347                 and     m_sample_vals_and_pointer == v.m_sample_vals_and_pointer;
00348 }
00349 
00350 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00351 bool enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::operator!=(const enc_vector_prac2<Coder, SampleDens,fixedIntWidth>& v)const
00352 {
00353     return !(*this == v);
00354 }
00355 
00356 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00357 void enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::swap(enc_vector_prac2<Coder, SampleDens,fixedIntWidth>& v)
00358 {
00359     if (this != &v) { // if v and _this_ are not the same object
00360         m_z.swap(v.m_z);                                        // swap compressed bit streams
00361         m_sample_vals_and_pointer.swap(v.m_sample_vals_and_pointer);
00362         std::swap(m_elements, v.m_elements);// swap the number of elements
00363     }
00364 }
00365 
00366 
00367 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00368 template<class Container>
00369 void enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::init(const Container& c)
00370 {
00371     // clear bit_vectors
00372     clear();
00373 
00374     if (c.empty())  // if c is empty there is nothing to do...
00375         return;
00376     typename Container::const_iterator  it                      = c.begin(), end = c.end();
00377     typename Container::value_type              v1                      = *it, v2, max_value=0, max_sample_value=0, x;
00378     size_type samples=0;
00379     size_type z_size = 0;
00380 //  (1) Calculate maximal value of samples and of deltas
00381     for (size_type i=0, no_sample=0; it != end; ++it,++i, --no_sample) {
00382         v2 = *it;
00383         if (!no_sample) { // add a sample
00384             no_sample = get_sample_dens();
00385             if (max_sample_value < v2) max_sample_value = v2;
00386             ++samples;
00387         } else {
00388             if (max_value < v2-v1) max_value = v2 - v1;
00389             z_size += Coder::encoding_length(v2-v1);
00390         }
00391         v1=v2;
00392     }
00393 //      (2) Write sample values and deltas
00394     {
00395         if (max_sample_value > z_size+1)
00396             m_sample_vals_and_pointer.set_int_width(bit_magic::l1BP(max_sample_value) + 1);
00397         else
00398             m_sample_vals_and_pointer.set_int_width(bit_magic::l1BP(z_size+1) + 1);
00399         m_sample_vals_and_pointer.resize(2*samples+2); // add 2 for last entry
00400         typename int_vector_type::iterator sv_it = m_sample_vals_and_pointer.begin();
00401         z_size = 0;
00402         size_type no_sample=0;
00403         for (it = c.begin(); it != end; ++it, --no_sample) {
00404             v2 = *it;
00405             if (!no_sample) { // add a sample
00406                 no_sample = get_sample_dens();
00407                 *sv_it = v2; ++sv_it;
00408                 *sv_it = z_size; ++sv_it;
00409             } else {
00410                 x = v2-v1;
00411                 if (v2 == v1) {
00412                     throw std::logic_error("enc_vector_prac2 cannot decode adjacent equal values!");
00413                 }
00414                 z_size += Coder::encoding_length(x);
00415             }
00416             v1=v2;
00417         }
00418         *sv_it = 0; ++sv_it;        // initialize
00419         *sv_it = z_size+1; ++sv_it; // last entry
00420 
00421         m_z.bit_resize(z_size);
00422         uint64_t* z_data = Coder::raw_data(m_z);
00423         uint8_t offset = 0;
00424         no_sample = 0;
00425         for (it = c.begin(); it != end; ++it, --no_sample) {
00426             v2 = *it;
00427             if (!no_sample) { // add a sample
00428                 no_sample = get_sample_dens();
00429             } else {
00430                 Coder::encode(v2-v1, z_data, offset);
00431             }
00432             v1=v2;
00433         }
00434 //              Coder::encode(delta_c, m_z); // encode delta_c to m_z
00435     }
00436 //      delta_c.resize(0);
00437 //std::cerr<<"Finished "<<std::endl;,
00438     m_elements = c.size();
00439 }
00440 
00441 
00442 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00443 template<uint8_t int_width, class size_type_class>
00444 void enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::init(int_vector_file_buffer<int_width, size_type_class>& v_buf)
00445 {
00446     // clear bit_vectors
00447     clear();
00448     size_type n = v_buf.int_vector_size;
00449     if (n == 0)  // if c is empty there is nothing to do...
00450         return;
00451     v_buf.reset();
00452     value_type  v1=0, v2=0, max_sample_value=0;
00453     size_type samples=0, z_size=0;
00454     const size_type sd = get_sample_dens();
00455     size_type z_size2 = 0, big_val_cnt=0, old_z_size=0, old_i=0;
00456     size_type big_val[sd+1], big_val_pos[sd+1];
00457     uint8_t log_sd=bit_magic::l1BP(sd)+1;
00458     size_type saved_bits=0;
00459 //  (1) Calculate maximal value of samples and of deltas
00460     for (size_type i=0, r_sum=0, r = v_buf.load_next_block(), no_sample = 0; r_sum < n;) {
00461         for (; i < r_sum+r; ++i, --no_sample) {
00462             v2 = v_buf[i-r_sum];
00463             if (!no_sample) { // is sample
00464                 no_sample = sd;
00465                 if (max_sample_value < v2) max_sample_value = v2;
00466                 ++samples;
00467 
00468                 size_type bits=0;
00469                 bits += log_sd; // bits for big_val_cnt
00470                 for (size_type j=0; j<big_val_cnt; ++j) {
00471                     bits += Coder::encoding_length(big_val[j]);
00472                     if (j == 0)
00473                         bits += Coder::encoding_length(big_val_pos[j]+1);
00474                     else
00475                         bits += Coder::encoding_length(big_val_pos[j]-big_val_pos[j-1]);
00476                 }
00477                 big_val_cnt = 0;
00478                 if (z_size - old_z_size > bits) {
00479                     z_size2 += bits;
00480                 } else {
00481                     z_size2 += (z_size-old_z_size);
00482                 }
00483                 old_z_size = z_size;
00484                 old_i = i;
00485             } else {
00486                 if (v2 == v1) {
00487                     throw std::logic_error("enc_vector_prac2 cannot decode adjacent equal values!");
00488                 }
00489                 z_size += Coder::encoding_length(v2-v1);
00490                 if (v2-v1 > 1) {
00491                     big_val[big_val_cnt] = v2-v1;
00492                     big_val_pos[big_val_cnt] = i-old_i;
00493                     ++big_val_cnt;
00494                 }
00495             }
00496             v1 = v2;
00497         }
00498         r_sum += r; r = v_buf.load_next_block();
00499     }
00500     std::cerr<<"z in MB = " << z_size/(8.0*1024.0*1024.0) << std::endl;
00501     std::cerr<<"z2 in MB = " << z_size2/(8.0*1024.0*1024.0) << std::endl;
00502     std::cerr<<"saving in MB = " << (z_size-z_size2)/(8.0*1024.0*1024.0) << std::endl;
00503 
00504 //      (2) Write sample values and deltas
00505 //     (a) Initialize array for sample values and pointers
00506     if (max_sample_value > z_size+1)
00507         m_sample_vals_and_pointer.set_int_width(bit_magic::l1BP(max_sample_value) + 1);
00508     else
00509         m_sample_vals_and_pointer.set_int_width(bit_magic::l1BP(z_size+1) + 1);
00510     m_sample_vals_and_pointer.resize(2*samples+2); // add 2 for last entry
00511 
00512 //     (b) Initilize bit_vector for encoded data
00513     m_z.bit_resize(z_size);
00514     uint64_t* z_data = Coder::raw_data(m_z);
00515     uint8_t offset = 0;
00516 
00517 //     (c) Write sample values and deltas
00518     v_buf.reset();
00519     z_size = 0;
00520     for (size_type i=0, j=0, r_sum=0, r = v_buf.load_next_block(), no_sample = 0; r_sum < n;) {
00521         for (; i < r_sum+r; ++i, --no_sample) {
00522             v2 = v_buf[i-r_sum];
00523             if (!no_sample) { // is sample
00524                 no_sample = sd;
00525                 m_sample_vals_and_pointer[j++] = v2;    // write samples
00526                 m_sample_vals_and_pointer[j++] = z_size;// write pointers
00527             } else {
00528                 z_size += Coder::encoding_length(v2-v1);
00529                 Coder::encode(v2-v1, z_data, offset);   // write encoded values
00530             }
00531             v1 = v2;
00532         }
00533         r_sum += r; r = v_buf.load_next_block();
00534     }
00535     m_elements = n;
00536 }
00537 
00538 
00539 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00540 enc_vector_prac2<>::size_type enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::serialize(std::ostream& out) const
00541 {
00542     size_type written_bytes = 0;
00543     out.write((char*) &m_elements, sizeof(m_elements));
00544     written_bytes += sizeof(m_elements);
00545     written_bytes += m_z.serialize(out);
00546     written_bytes += m_sample_vals_and_pointer.serialize(out);
00547     return written_bytes;
00548 }
00549 
00550 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00551 void enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::load(std::istream& in)
00552 {
00553     in.read((char*) &m_elements, sizeof(m_elements));
00554     m_z.load(in);
00555     m_sample_vals_and_pointer.load(in);
00556 }
00557 
00558 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00559 const typename enc_vector_prac2<Coder,SampleDens,fixedIntWidth>::const_iterator enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::begin()const
00560 {
00561     return const_iterator(this, 0);
00562 }
00563 
00564 template<class Coder, uint32_t SampleDens, uint8_t fixedIntWidth>
00565 const typename enc_vector_prac2<Coder,SampleDens,fixedIntWidth>::const_iterator enc_vector_prac2<Coder, SampleDens,fixedIntWidth>::end()const
00566 {
00567     return const_iterator(this, this->m_elements);
00568 }
00569 
00570 
00571 } // end namespace sdsl
00572 
00573 #endif