SDSL: Succinct Data Structure Library: sdsl/include/sdsl/int

00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2008 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef INCLUDED_SDSL_INT_VECTOR
00022 #define INCLUDED_SDSL_INT_VECTOR
00023 
00024 #include "compatibility.hpp"
00025 #include "bitmagic.hpp"
00026 #include "util.hpp"
00027 #include "testutils.hpp"
00028 #include "uintx_t.hpp"
00029 #include "structure_tree.hpp"
00030 #include <iosfwd>    // forward declaration of ostream
00031 #include <stdexcept> // for exceptions
00032 #include <iostream>  // for cerr
00033 #include <typeinfo>
00034 #include <cassert>
00035 #include <iterator>
00036 #include <cstdlib>
00037 #include <cstddef>
00038 #include <ctime>    // for rand initialization
00039 #include <cstring>  // for memcpy
00040 #include <ostream>
00041 #include <istream>
00042 #include <string>
00043 
00044 
00045 
00047 namespace sdsl
00048 {
00049 
00050 typedef uint64_t std_size_type_for_int_vector;
00051 
00052 template<uint8_t fixedIntWidth=0, class size_type_class = std_size_type_for_int_vector>
00053 class int_vector; // forward declaration
00054 
00055 
00056 namespace algorithm
00057 {
00058 template<uint8_t fixedIntWidth>
00059 static void calculate_sa(const unsigned char* c, typename int_vector<fixedIntWidth>::size_type len, int_vector<fixedIntWidth>& sa);
00060 }
00061 
00062 
00064 typedef int_vector<1> bit_vector;
00065 
00066 template<class int_vector>
00067 class int_vector_reference;
00068 
00069 template<class int_vector>
00070 class int_vector_iterator_base;  // forward declaration
00071 
00072 template<class int_vector>
00073 class int_vector_iterator; // forward declaration
00074 
00075 template<class int_vector>
00076 class int_vector_const_iterator;  // forward declaration
00077 
00078 template<uint8_t b, uint8_t patter_len>  // forward declaration
00079 class rank_support_v;
00080 
00081 class rank_support_jmc;
00082 template<uint8_t bit_pattern, uint8_t pattern_len>
00083 class rank_support_v;
00084 class rank_support;
00085 
00086 class select_support;
00087 
00088 template<class RankSupport>
00089 class select_support_bs;
00090 
00091 template<uint8_t bit_pattern, uint8_t pattern_len> // forward declaration
00092 class select_support_mcl;
00093 
00094 namespace coder
00095 {
00096 class fibonacci;
00097 class elias_delta;
00098 class ternary;
00099 }
00100 
00101 template<uint8_t=0, class size_type_class = std_size_type_for_int_vector>
00102 class int_vector_file_buffer;
00103 
00104 template<class size_type_class = std_size_type_for_int_vector>
00105 class char_array_serialize_wrapper;
00106 
00107 
00108 template<uint8_t fixedIntWidth, class size_type_class>
00109 struct int_vector_trait {
00110     typedef uint64_t                            value_type;
00111     typedef size_type_class                     size_type;
00112     typedef int_vector<fixedIntWidth, size_type_class> int_vector_type;
00113     typedef int_vector_reference<int_vector_type>       reference;
00114     typedef const uint64_t const_reference;
00115     typedef uint8_t int_width_type;
00116     typedef int_vector_iterator<int_vector_type>                iterator;
00117     typedef int_vector_const_iterator<int_vector_type>   const_iterator;
00118     // Sets int_width to new_int_width
00119     static void set_int_width(int_width_type& int_width, const uint8_t new_int_width) {
00120         if (fixedIntWidth==1)
00121             return;
00122         if (new_int_width>0 and new_int_width<=64)
00123             int_width = new_int_width;
00124         else
00125             int_width = 64;
00126     }
00127 
00128     // read the size and int_width of a bit_vector
00129     static void read_header(size_type& size, int_width_type& int_width, std::istream& in) {
00130         util::read_member(size, in);
00131         if (0 == fixedIntWidth) {
00132             util::read_member(int_width, in);
00133             set_int_width(int_width, int_width);
00134         }
00135     }
00136 
00137     static iterator begin(int_vector_type* v, uint64_t*) {
00138         return iterator(v, 0);
00139     }
00140     static iterator end(int_vector_type* v, uint64_t*, size_type) {
00141         return iterator(v, v->size()*v->get_int_width()) ;
00142     }
00143     static const_iterator begin(const int_vector_type* v, const uint64_t*) {
00144         return const_iterator(v, 0);
00145     }
00146     static const_iterator end(const int_vector_type* v, const uint64_t*, size_type) {
00147         return const_iterator(v, v->size()*v->get_int_width());
00148     }
00149 };
00150 
00151 template<class size_type_class>
00152 struct int_vector_trait<64, size_type_class> {
00153     typedef uint64_t                            value_type;
00154     typedef size_type_class                     size_type;
00155     typedef int_vector<64, size_type_class> int_vector_type;
00156     typedef uint64_t& reference;
00157     typedef const uint64_t const_reference;
00158     typedef const uint8_t int_width_type;
00159     typedef uint64_t* iterator;
00160     typedef const uint64_t* const_iterator;
00161 
00162 
00163     static void set_int_width(int_width_type&, const uint8_t) {}
00164 
00165     // read the size and int_width of a bit_vector
00166     static void read_header(size_type& size, int_width_type&, std::istream& in) {
00167         util::read_member(size, in);
00168     }
00169 
00170     static iterator begin(int_vector_type*, uint64_t* begin) {
00171         return begin;
00172     }
00173     static iterator end(int_vector_type*, uint64_t* begin, size_type size) {
00174         return begin+size;
00175     }
00176     static const_iterator begin(const int_vector_type*, const uint64_t* begin) {
00177         return begin;
00178     }
00179     static const_iterator end(const int_vector_type*, const uint64_t* begin, size_type size) {
00180         return begin+size;
00181     }
00182 };
00183 
00184 template<class size_type_class>
00185 struct int_vector_trait<32, size_type_class> {
00186     typedef uint32_t                            value_type;
00187     typedef size_type_class                     size_type;
00188     typedef int_vector<32, size_type_class> int_vector_type;
00189     typedef uint32_t& reference;
00190     typedef const uint32_t const_reference;
00191     typedef const uint8_t int_width_type;
00192     typedef uint32_t* iterator;
00193     typedef const uint32_t* const_iterator;
00194     static void set_int_width(int_width_type&, const uint8_t) {}
00195 
00196     // read the size and int_width of a bit_vector
00197     static void read_header(size_type& size, int_width_type&, std::istream& in) {
00198         util::read_member(size, in);
00199     }
00200 
00201     static iterator begin(int_vector_type*, uint64_t* begin) {
00202         return (uint32_t*)begin;
00203     }
00204     static iterator end(int_vector_type*, uint64_t* begin, size_type size) {
00205         return ((uint32_t*)begin)+size;
00206     }
00207     static const_iterator begin(const int_vector_type*, const uint64_t* begin) {
00208         return (uint32_t*)begin;
00209     }
00210     static const_iterator end(const int_vector_type*, const uint64_t* begin, size_type size) {
00211         return ((uint32_t*)begin)+size;
00212     }
00213 };
00214 
00215 template<class size_type_class>
00216 struct int_vector_trait<16, size_type_class> {
00217     typedef uint16_t                            value_type;
00218     typedef size_type_class                     size_type;
00219     typedef int_vector<16, size_type_class> int_vector_type;
00220     typedef uint16_t& reference;
00221     typedef const uint16_t const_reference;
00222     typedef const uint8_t int_width_type;
00223     typedef uint16_t* iterator;
00224     typedef const uint16_t* const_iterator;
00225     static void set_int_width(int_width_type&, const uint8_t) {}
00226 
00227     // read the size and int_width of a bit_vector
00228     static void read_header(size_type& size, int_width_type&, std::istream& in) {
00229         util::read_member(size, in);
00230     }
00231 
00232     static iterator begin(int_vector_type*, uint64_t* begin) {
00233         return (uint16_t*)begin;
00234     }
00235     static iterator end(int_vector_type*, uint64_t* begin, size_type size) {
00236         return ((uint16_t*)begin)+size;
00237     }
00238     static const_iterator begin(const int_vector_type*, const uint64_t* begin) {
00239         return (uint16_t*)begin;
00240     }
00241     static const_iterator end(const int_vector_type*, const uint64_t* begin, size_type size) {
00242         return ((uint16_t*)begin)+size;
00243     }
00244 };
00245 
00246 template<class size_type_class>
00247 struct int_vector_trait<8, size_type_class> {
00248     typedef uint8_t                                     value_type;
00249     typedef size_type_class                     size_type;
00250     typedef int_vector<8, size_type_class> int_vector_type;
00251     typedef uint8_t& reference;
00252     typedef const uint8_t const_reference;
00253     typedef const uint8_t int_width_type;
00254     typedef uint8_t* iterator;
00255     typedef const uint8_t* const_iterator;
00256     static void set_int_width(int_width_type&, const uint8_t) {}
00257 
00258     // read the size and int_width of a bit_vector
00259     static void read_header(size_type& size, int_width_type&, std::istream& in) {
00260         util::read_member(size, in);
00261     }
00262 
00263     static iterator begin(int_vector_type*, uint64_t* begin) {
00264         return (uint8_t*)begin;
00265     }
00266     static iterator end(int_vector_type*, uint64_t* begin, size_type size) {
00267         return ((uint8_t*)begin)+size;
00268     }
00269     static const_iterator begin(const int_vector_type*, const uint64_t* begin) {
00270         return (uint8_t*)begin;
00271     }
00272     static const_iterator end(const int_vector_type*, const uint64_t* begin, size_type size) {
00273         return ((uint8_t*)begin)+size;
00274     }
00275 };
00276 
00278 
00303 template<uint8_t fixedIntWidth, class size_type_class>
00304 class int_vector
00305 {
00306     public:
00307 
00308         typedef typename int_vector_trait<fixedIntWidth,size_type_class>::value_type            value_type;     // STL Container requirement
00309         typedef typename int_vector_trait<fixedIntWidth,size_type_class>::iterator                      iterator;       // STL Container requirement
00310         typedef typename int_vector_trait<fixedIntWidth,size_type_class>::const_iterator        const_iterator;
00311         typedef typename int_vector_trait<fixedIntWidth,size_type_class>::reference             reference;
00312         typedef typename int_vector_trait<fixedIntWidth,size_type_class>::const_reference       const_reference;
00313         typedef int_vector_reference<int_vector>*                                                       pointer; // TODO: fix this for 63,32,16,8 bit vector
00314         typedef const value_type*                                                                                       const_pointer;
00315         typedef ptrdiff_t                                       difference_type;// STL Container requirement
00316         typedef size_type_class                                                                                         size_type;              // STL Container requirement
00317         typedef typename int_vector_trait<fixedIntWidth,size_type_class>::int_width_type        int_width_type;
00318         typedef rank_support_v<1,1> rank_1_type;
00319         typedef rank_support_v<0,1> rank_0_type;
00320         typedef select_support_mcl<1,1> select_1_type;
00321         typedef select_support_mcl<0,1> select_0_type;
00322 
00323         friend struct int_vector_trait<fixedIntWidth, size_type_class>;
00324         friend class int_vector_iterator_base<int_vector>;
00325         friend class int_vector_iterator<int_vector>;
00326         friend class int_vector_const_iterator<int_vector>;
00327         friend class  coder::elias_delta;
00328         friend class  coder::fibonacci;
00329         friend class  coder::ternary;
00330         friend class  int_vector_file_buffer<fixedIntWidth, size_type_class>;
00332         friend std::istream& operator>>(std::istream&, int_vector<1>&);
00333         friend void util::set_random_bits<int_vector>(int_vector& v, int);
00334         friend void util::set_zero_bits<int_vector>(int_vector& v);
00335         friend void util::set_one_bits<int_vector>(int_vector& v);
00336         friend void util::bit_compress<int_vector>(int_vector& v);
00337         friend void util::set_all_values_to_k<int_vector>(int_vector& v, uint64_t k);
00338         friend void algorithm::calculate_sa<fixedIntWidth>(const unsigned char* c, typename int_vector<fixedIntWidth>::size_type len, int_vector<fixedIntWidth>& sa);
00339     private:
00340         size_type       m_size; 
00341         uint64_t*   m_data; 
00342         int_width_type m_int_width;
00343     public:
00344 
00346 
00351         int_vector(size_type elements = 0, value_type default_value = 0, uint8_t intWidth = fixedIntWidth);
00352 
00354 
00357         int_vector(const int_vector& v);
00358 
00360         ~int_vector();
00361 
00363 
00366         bool empty() const;
00367 
00369         /*  The swap method can be defined in terms of assignment.
00370                 This requires three assignments, each of which, for a container type, is linear
00371                 in the container's size. In a sense, then, a.swap(b) is redundant.
00372                 This implementation guaranties a run-time complexity that is constant rather than linear.
00373                 \params v int_vector to swap.
00374                 Required for the Assignable Conecpt of the STL.
00375           */
00376         void swap(int_vector& v);
00377 
00379 
00383         void resize(const size_type size);
00384 
00386 
00388         void bit_resize(const size_type size);
00389 
00391 
00395         size_type size() const;
00396 
00398 
00402         static size_type max_size();
00403 
00405 
00408         size_type bit_size() const;
00409 
00411 
00415         size_type capacity() const;
00416 
00418 
00421         const uint64_t* data() const {
00422             return m_data;
00423         }
00424 
00426 
00431         const value_type get_int(size_type idx, const uint8_t len=64) const;
00432 
00434 
00441         void set_int(size_type idx, value_type x, const uint8_t len=64);
00442 
00444 
00447         const uint8_t get_int_width() const;
00448 
00450 
00454         void set_int_width(uint8_t intWidth);
00455 
00457 
00461         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name = "", bool write_fixed_as_variable=false) const;
00462 
00464         void load(std::istream& in);
00465 
00467 
00472         inline reference operator[](const size_type& i);
00473 
00475 
00480         inline const_reference operator[](const size_type& i) const;
00481 
00482         // ! This operator should be used for displaying a specific range of the int_vector .
00483         /* ! \param range This const char array should consists of an integer <b>begin_idx</b> followed by two dots (..) followed by an integer <b>end_idx</b>. E.g. "0..10" or "3..7".
00484                 \return A string containing the bit representation from index <b>begin_idx</b> to (inclusive!) <b>end_idx</b>.
00485          */
00486 //              std::string operator[](const char* range) const;
00487 
00489 
00494         int_vector& operator=(const int_vector& v);
00495 
00496 
00498 
00506         bool operator==(const int_vector<fixedIntWidth>& v) const;
00507 
00509 
00519         bool operator!=(const int_vector& v) const;
00520 
00522 
00529         bool operator<(const int_vector& v) const; //TODO: Test
00530 
00532 
00539         bool operator>(const int_vector& v) const;//TODO: Test
00541 
00545         bool operator<=(const int_vector& v) const;//TODO: Test
00547 
00551         bool operator>=(const int_vector& v) const;//TODO: Test
00552 
00554 
00557         const iterator begin();
00558 
00560 
00563         const iterator end();
00564 
00566 
00568         const const_iterator begin() const;
00569 
00571 
00573         const const_iterator end() const;
00574 
00575         //TODO: rbegin()
00576         //TODO: rend()
00577 
00578     private:
00580         /* \param i Position of the bit to set to value b.
00581            \param b Value to set the bit at position i.
00582            \sa getBit
00583          */
00584         void setBit(size_type i,const bool b=true);
00585 
00587         /* \param i Position of the bit.
00588            \returns The boolean value of the bit at position i.
00589            \sa setBit
00590          */
00591         const bool getBit(size_type i) const;
00592 
00593 
00594 };
00606 
00607 template<class int_vector>
00608 class int_vector_reference
00609 {
00610     private:
00611         typename int_vector::value_type* const m_word;
00612         const uint8_t m_offset;
00613         const uint8_t m_len; 
00614     public:
00616 
00620         int_vector_reference(typename int_vector::value_type* word, uint8_t offset, uint8_t len):
00621             m_word(word),m_offset(offset),m_len(len) {};
00622 
00624 
00631         int_vector_reference& operator=(typename int_vector::value_type x) {
00632             bit_magic::write_int(m_word, x, m_offset, m_len);
00633             return *this;
00634         };
00635 
00636         int_vector_reference& operator=(const int_vector_reference& x) {
00637             return *this = typename int_vector::value_type(x);
00638         };
00639 
00641         operator typename int_vector::value_type()const {
00642             return bit_magic::read_int(m_word, m_offset, m_len);
00643         }
00644 
00645         bool operator==(const int_vector_reference& x)const {
00646             return typename int_vector::value_type(*this) == typename int_vector::value_type(x);
00647         }
00648 
00649         bool operator<(const int_vector_reference& x)const {
00650             return typename int_vector::value_type(*this) < typename int_vector::value_type(x);
00651         }
00652 };
00653 
00654 // specialization for int_vector_reference for int_vector == bit_vector
00655 // special thanks to Timo Beller, who pointed out that the specialization is missing
00656 template<>
00657 class int_vector_reference<bit_vector>
00658 {
00659     private:
00660         uint64_t* const m_word;
00661         uint64_t m_mask; // TODO make it const
00662     public:
00664 
00667         int_vector_reference(uint64_t* word, uint8_t offset, uint8_t):
00668             m_word(word),m_mask(1ULL<<offset) {};
00669 
00671 
00678         int_vector_reference& operator=(bool x) {
00679             if (x)
00680                 *m_word |= m_mask;
00681             else
00682                 *m_word &= ~m_mask;
00683             return *this;
00684         };
00685 
00686         int_vector_reference& operator=(const int_vector_reference& x) {
00687             return *this = bool(x);
00688         };
00689 
00691         operator bool()const {
00692             return !!(*m_word & m_mask);
00693         }
00694 
00695         bool operator==(const int_vector_reference& x)const {
00696             return bool(*this) == bool(x);
00697         }
00698 
00699         bool operator<(const int_vector_reference& x)const {
00700             return !bool(*this) && bool(x);
00701         }
00702 };
00703 
00704 
00705 
00706 template<class int_vector>
00707 class int_vector_iterator_base: public std::iterator<std::random_access_iterator_tag, typename int_vector::value_type, typename int_vector::difference_type/*ptrdiff_t*/>
00708 {
00709     public:
00710         typedef /*typename int_vector::size_type*/uint64_t                              size_type;
00711     protected:
00712         uint8_t m_offset;
00713         uint8_t m_len;
00714 
00715     public:
00716 
00717         int_vector_iterator_base(uint8_t offset, uint8_t len):m_offset(offset),m_len(len)
00718         {}
00719 
00720         int_vector_iterator_base(const int_vector* v=NULL, size_type idx=0) { /*:m_offset(idx&0x3F), m_len(v->m_int_width)*/
00721             m_offset = idx&0x3F;
00722             if (v==NULL)
00723                 m_len = 0;
00724             else
00725                 m_len = v->m_int_width;
00726         }
00727 };
00728 
00729 template<class int_vector>
00730 class int_vector_iterator : public int_vector_iterator_base<int_vector>
00731 {
00732     public:
00733         typedef int_vector_reference<int_vector>        reference;
00734         typedef int_vector_iterator                             iterator;
00735         typedef reference*                                                      pointer;
00736         typedef typename int_vector::size_type          size_type;
00737         typedef typename int_vector::difference_type difference_type;
00738 
00739     private:
00740 
00741         using int_vector_iterator_base<int_vector>::m_offset; // make m_offset easy usable
00742         using int_vector_iterator_base<int_vector>::m_len;    // make m_len easy usable
00743 
00744         typename int_vector::value_type* m_word;
00745 
00746     public:
00747 
00748         inline int_vector_iterator(int_vector* v=NULL, size_type idx=0) : int_vector_iterator_base<int_vector>(v, idx) {
00749             if (v!=NULL)
00750                 m_word = v->m_data + (idx>>6);
00751             else
00752                 m_word = NULL;
00753         }
00754 
00755 
00756         inline int_vector_iterator(const int_vector_iterator<int_vector>& it) : int_vector_iterator_base<int_vector>(it.m_offset, it.m_len) {
00757             m_word = it.m_word;
00758         }
00759 
00760         reference operator*() const {
00761             return reference(m_word, m_offset, m_len);
00762         }
00763 
00765         iterator& operator++() {
00766             m_offset+=m_len;
00767             if (m_offset >= 64) {
00768                 m_offset &= 0x3F;
00769                 ++m_word;
00770             }
00771             return *this;
00772         }
00773 
00775         iterator operator++(int x) {
00776             int_vector_iterator it = *this;
00777             ++(*this);
00778             return it;
00779         }
00780 
00782         iterator& operator--() {
00783             m_offset-=m_len;
00784             if (m_offset >= 64) {
00785                 m_offset &= 0x3F;
00786                 --m_word;
00787             }
00788             return *this;
00789         }
00790 
00792         iterator operator--(int x) {
00793             int_vector_iterator it = *this;
00794             --(*this);
00795             return it;
00796         }
00797 
00798         iterator& operator+=(difference_type i) {
00799             if (i<0)
00800                 return *this -= (-i);
00801             difference_type t = i*m_len;
00802             m_word += (t>>6);
00803             if ((m_offset+=(t&0x3F))&~0x3F) {  // if new offset is >= 64
00804                 ++m_word;       // add one to the word
00805                 m_offset&=0x3F; // offset = offset mod 64
00806             }
00807             return *this;
00808         }
00809 
00810         iterator& operator-=(difference_type i) {
00811             if (i<0)
00812                 return *this += (-i);
00813             difference_type t = i*m_len;
00814             m_word -= (t>>6);
00815             if ((m_offset-=(t&0x3F))&~0x3F) {  // if new offset is < 0
00816                 --m_word;
00817                 m_offset&=0x3F;
00818             }
00819             return *this;
00820         }
00821 
00822         iterator& operator=(const int_vector_iterator<int_vector>& it) {
00823             if (this != &it) {
00824                 m_word          = it.m_word;
00825                 m_offset        = it.m_offset;
00826                 m_len           = it.m_len;
00827             }
00828             return *this;
00829         }
00830 
00831         iterator operator+(difference_type i) const {
00832             iterator it = *this;
00833             return it += i;
00834         }
00835 
00836         iterator operator-(difference_type i) const {
00837             iterator it = *this;
00838             return it -= i;
00839         }
00840 
00841         reference operator[](difference_type i) const {
00842             return *(*this + i);
00843         }
00844 
00845         bool operator==(const int_vector_iterator& it)const {
00846             return it.m_word == m_word && it.m_offset == m_offset;
00847         }
00848 
00849         bool operator!=(const int_vector_iterator& it)const {
00850             return !(*this==it);
00851         }
00852 
00853         bool operator<(const int_vector_iterator& it)const {
00854             if (m_word == it.m_word)
00855                 return m_offset < it.m_offset;
00856             return m_word < it.m_word;
00857         }
00858 
00859         bool operator>(const int_vector_iterator& it)const {
00860             if (m_word == it.m_word)
00861                 return m_offset > it.m_offset;
00862             return m_word > it.m_word;
00863         }
00864 
00865         bool operator>=(const int_vector_iterator& it)const {
00866             return !(*this < it);
00867         }
00868 
00869         bool operator<=(const int_vector_iterator& it)const {
00870             return !(*this > it);
00871         }
00872         inline difference_type operator-(const int_vector_iterator& it) {
00873             return (((m_word - it.m_word)<<6) + m_offset - it.m_offset) / m_len;
00874         }
00875 };
00876 
00877 template<class int_vector>
00878 inline int_vector_iterator<int_vector> operator+(typename int_vector_iterator<int_vector>::difference_type n, const int_vector_iterator<int_vector>& it)
00879 {
00880     return it+n;
00881 }
00882 
00883 template<class int_vector>
00884 class int_vector_const_iterator : public int_vector_iterator_base<int_vector>
00885 {
00886     public:
00887         typedef typename int_vector::value_type         const_reference;
00888         typedef const typename int_vector::value_type*  pointer;
00889         typedef int_vector_const_iterator               const_iterator;
00890         typedef typename int_vector::size_type          size_type;
00891         typedef typename int_vector::difference_type difference_type;
00892 
00893     private:
00894 
00895         using int_vector_iterator_base<int_vector>::m_offset; // make m_offset easy usable
00896         using int_vector_iterator_base<int_vector>::m_len;    // make m_len easy usable
00897 
00898         const typename int_vector::value_type* m_word;
00899 
00900     public:
00901 
00902         int_vector_const_iterator(const int_vector* v=NULL, size_type idx=0) : int_vector_iterator_base<int_vector>(v, idx) {
00903             if (v!=NULL)
00904                 m_word = v->m_data + (idx>>6);
00905             else
00906                 m_word = NULL;
00907         }
00908 
00909         int_vector_const_iterator(const int_vector_iterator<int_vector>& it):int_vector_iterator_base<int_vector>(it.m_offset, it.m_len),m_word(it.m_word)
00910         { }
00911         /*
00912                 int_vector_const_iterator(const int_vector_const_iterator<int_vector> &it):int_vector_iterator_base<int_vector>(it.m_offset, it.m_len),m_word(it.m_word)
00913                 {
00914                 }
00915         */
00916 
00917         const_reference operator*() const {
00918             if (m_offset+m_len <= 64) {
00919                 return ((*m_word)>>m_offset)&bit_magic::Li1Mask[m_len];
00920             } else {
00921                 return ((*m_word)>>m_offset) | ((*(m_word+1) & bit_magic::Li1Mask[(m_offset+m_len)&0x3F])<<(64-m_offset));
00922             }
00923         }
00924 
00926         const_iterator& operator++() {
00927             m_offset+=m_len;
00928             if (m_offset >= 64) {
00929                 m_offset &= 0x3F;
00930                 ++m_word;
00931             }
00932             return *this;
00933         }
00934 
00936         const_iterator operator++(int x) {
00937             int_vector_const_iterator it = *this;
00938             ++(*this);
00939             return it;
00940         }
00941 
00943         const_iterator& operator--() {
00944             m_offset-=m_len;
00945             if (m_offset >= 64) {
00946                 m_offset &= 0x3F;
00947                 --m_word;
00948             }
00949             return *this;
00950         }
00951 
00953         const_iterator operator--(int x) {
00954             int_vector_const_iterator it = *this;
00955             --(*this);
00956             return it;
00957         }
00958 
00959         const_iterator& operator+=(difference_type i) {
00960             if (i<0)
00961                 return *this -= (-i);
00962             difference_type t = i*m_len;
00963             m_word += (t>>6);
00964             if ((m_offset+=(t&0x3F))&~0x3F) {// if new offset >= 64
00965                 ++m_word;       // add one to the word
00966                 m_offset&=0x3F; // offset = offset mod 64
00967             }
00968             return *this;
00969         }
00970 
00971         const_iterator& operator-=(difference_type i) {
00972             if (i<0)
00973                 return *this += (-i);
00974             difference_type t = i*m_len;
00975             m_word -= (t>>6);
00976             if ((m_offset-=(t&0x3F))&~0x3F) {// if new offset is < 0
00977                 --m_word;
00978                 m_offset&=0x3F;
00979             }
00980             return *this;
00981         }
00982 
00983         const_iterator operator+(difference_type i) const {
00984             const_iterator it = *this;
00985             return it += i;
00986         }
00987 
00988         const_iterator operator-(difference_type i) const {
00989             const_iterator it = *this;
00990             return it -= i;
00991         }
00992 
00993         const_reference operator[](difference_type i) const {
00994             return *(*this + i);
00995         }
00996 
00997         bool operator==(const int_vector_const_iterator& it)const {
00998             return it.m_word == m_word && it.m_offset == m_offset;
00999         }
01000 
01001         bool operator!=(const int_vector_const_iterator& it)const {
01002             return !(*this==it);
01003         }
01004 
01005         bool operator<(const int_vector_const_iterator& it)const {
01006             if (m_word == it.m_word)
01007                 return m_offset < it.m_offset;
01008             return m_word < it.m_word;
01009         }
01010 
01011         bool operator>(const int_vector_const_iterator& it)const {
01012             if (m_word == it.m_word)
01013                 return m_offset > it.m_offset;
01014             return m_word > it.m_word;
01015         }
01016 
01017         bool operator>=(const int_vector_const_iterator& it)const {
01018             return !(*this < it);
01019         }
01020 
01021         bool operator<=(const int_vector_const_iterator& it)const {
01022             return !(*this > it);
01023         }
01024 
01025 };
01026 
01027 template<class int_vector>
01028 inline typename int_vector_const_iterator<int_vector>::difference_type operator-(const int_vector_const_iterator<int_vector>& x, const int_vector_const_iterator<int_vector>& y)
01029 {
01030     return (((x.m_word - y.m_word)<<6) + x.m_offset - y.m_offset) / x.m_len;
01031 }
01032 
01033 template<class int_vector>
01034 inline int_vector_const_iterator<int_vector> operator+(typename int_vector_const_iterator<int_vector>::difference_type n, const int_vector_const_iterator<int_vector>& it)
01035 {
01036     return it + n;
01037 }
01038 
01039 //std::ostream& operator<<(std::ostream&, const int_vector<1> &);
01040 
01041 inline std::ostream& operator<<(std::ostream& os, const int_vector<1>& v)
01042 {
01043     for (int_vector<1>::const_iterator it=v.begin(), end = v.end(); it != end; ++it) {
01044         os << *it;
01045     }
01046     os << std::endl;
01047     return os;
01048 }
01049 
01050 inline std::ostream& operator<<(std::ostream& os, const int_vector<0>& v)
01051 {
01052     for (int_vector<0>::const_iterator it=v.begin(), end = v.end(); it != end; ++it) {
01053         os << *it;
01054         if (it+1 != end) os << " ";
01055     }
01056     os << std::endl;
01057     return os;
01058 }
01059 
01060 
01061 /*
01062 inline std::istream& operator>>(std::istream &in, int_vector<1> &v){
01063         char c;
01064         v.resize(0); // clear
01065         do{
01066                 c = in.get();
01067                 if(c=='0' || c=='1'){
01068                         v.bit_resize(v.bit_size()+1);
01069                         v.setBit(v.bit_size()-1,c=='1');
01070                 }
01071                 else{
01072                         break;
01073                 }
01074         }while(true);
01075         return in;
01076 }
01077 */
01078 
01079 // ==== int_vector implemenation  ====
01080 
01081 template<uint8_t fixedIntWidth, class size_type_class>
01082 inline int_vector<fixedIntWidth,size_type_class>::int_vector(size_type elements, value_type default_value, uint8_t intWidth):m_size(0), m_data(NULL), m_int_width(intWidth)
01083 {
01084     int_vector_trait<fixedIntWidth,size_type_class>::set_int_width(m_int_width, intWidth);
01085     resize(elements);
01086     if (default_value == 0) {
01087         util::set_zero_bits(*this);
01088     } else if (default_value == 1 and m_int_width == 1) {
01089         util::set_one_bits(*this);
01090     } else {
01091         util::set_all_values_to_k(*this, default_value); // new initialization
01092     }
01093 }
01094 
01095 template<uint8_t fixedIntWidth, class size_type_class>
01096 inline int_vector<fixedIntWidth,size_type_class>::int_vector(const int_vector& v):m_size(0), m_data(NULL), m_int_width(v.m_int_width)
01097 {
01098     bit_resize(v.bit_size());
01099     if (v.capacity() > 0) {
01100         if (memcpy(m_data, v.data() ,v.capacity()/8)==NULL) {
01101             throw std::bad_alloc();
01102         }
01103     }
01104     int_vector_trait<fixedIntWidth,size_type_class>::set_int_width(m_int_width, v.m_int_width);
01105 //      set_int_width(v.get_int_width());
01106 }
01107 
01108 template<uint8_t fixedIntWidth, class size_type_class>
01109 int_vector<fixedIntWidth,size_type_class>& int_vector<fixedIntWidth,size_type_class>::operator=(const int_vector& v)
01110 {
01111     if (this != &v) {// if v is not the same object
01112         bit_resize(v.bit_size());
01113         if (v.bit_size()>0) {
01114             if (memcpy(m_data, v.data() ,v.capacity()/8)==NULL) {
01115                 throw std::bad_alloc();
01116             }
01117         }
01118         set_int_width(v.get_int_width());
01119     }
01120     return *this;
01121 }
01122 
01123 // Destructor
01124 template<uint8_t fixedIntWidth, class size_type_class>
01125 int_vector<fixedIntWidth,size_type_class>::~int_vector()
01126 {
01127     if (m_data != NULL) {
01128         free(m_data); //fixed delete
01129     }
01130 }
01131 
01132 template<uint8_t fixedIntWidth, class size_type_class>
01133 void int_vector<fixedIntWidth,size_type_class>::swap(int_vector& v)
01134 {
01135     if (this != &v) { // if v and _this_ are not the same object
01136         size_type       size            = m_size;
01137         uint64_t*        data           = m_data;
01138         uint8_t         intWidth        = m_int_width;
01139         m_size          = v.m_size;
01140         m_data          = v.m_data;
01141         int_vector_trait<fixedIntWidth,size_type_class>::set_int_width(m_int_width, v.m_int_width);
01142         v.m_size        = size;
01143         v.m_data        = data;
01144         int_vector_trait<fixedIntWidth,size_type_class>::set_int_width(v.m_int_width, intWidth);
01145     }
01146 }
01147 
01148 template<uint8_t fixedIntWidth, class size_type_class>
01149 void int_vector<fixedIntWidth,size_type_class>::resize(const size_type size)
01150 {
01151     bit_resize(size * get_int_width());
01152 }
01153 
01154 
01155 template<uint8_t fixedIntWidth, class size_type_class>
01156 void int_vector<fixedIntWidth,size_type_class>::bit_resize(const size_type size)
01157 {
01158     bool do_realloc = ((size+63)>>6) != ((m_size+63)>>6);
01159     const size_type old_size = m_size;
01160     m_size = size;                       // set new size
01161     // special case: bitvector of size 0
01162     if (do_realloc or m_data==NULL) { // or (fixedIntWidth==1 and m_size==0) ) {
01163         uint64_t* data = NULL;
01164         // Note that we allocate 8 additional bytes if m_size % 64 == 0.
01165         // We need this padding since rank data structures do a memory
01166         // access to this padding to answer rank(size()) if size()%64 ==0.
01167         // Note that this padding is not counted in the serialize method!
01168         data = (uint64_t*)realloc(m_data, (((m_size+64)>>6)<<3)); // if m_data == NULL realloc
01169         // Method realloc is equivalent to malloc if m_data == NULL.
01170         // If size is zero and ptr is not NULL, a new, minimum sized object is allocated and the original object is freed.
01171         // The allocated memory is aligned such that it can be used for any data type, including AltiVec- and SSE-related types.
01172         m_data = data;
01173         // initialize unreachable bits to 0
01174         if (m_size > old_size and bit_size() < capacity()) {//m_size>0
01175             bit_magic::write_int(m_data+(bit_size()>>6), 0, bit_size()&0x3F, capacity()-bit_size());
01176         }
01177         if ((m_size % 64) == 0) {  // initialize unreachable bits with 0
01178             m_data[m_size/64] = 0;
01179         }
01180     }
01181 }
01182 
01183 template<uint8_t fixedIntWidth, class size_type_class>
01184 inline void int_vector<fixedIntWidth,size_type_class>::setBit(size_type idx,const bool value)
01185 {
01186 #ifdef SDSL_DEBUG
01187     if (idx >= m_size) {
01188         throw std::out_of_range("OUT_OF_RANGE_ERROR: int_vector::setBit(size_type idx, const bool value); idx >= size()!");
01189     }
01190 #endif
01191     size_type int64_idx = idx >> 6;  // idx/64
01192     idx &= 0x3F; // idx inside a uint64_t
01193     if (value) {
01194         m_data[int64_idx] |= 1ULL << idx;
01195     } else {
01196         m_data[int64_idx] &= (bit_magic::All1Mask ^ (1ULL << idx));
01197     }
01198 }
01199 
01200 template<uint8_t fixedIntWidth, class size_type_class>
01201 inline const bool int_vector<fixedIntWidth,size_type_class>::getBit(size_type idx)const
01202 {
01203 #ifdef SDSL_DEBUG
01204     if (idx >= m_size) {
01205         throw std::out_of_range("OUT_OF_RANGE_ERROR: int_vector::getBit(size_type idx); idx >= size()!");
01206     }
01207 #endif
01208     return m_data[idx >> 6] & (1ULL << (idx & 0x3F));
01209 }
01210 
01211 template<uint8_t fixedIntWidth, class size_type_class>
01212 inline const typename int_vector<fixedIntWidth,size_type_class>::value_type int_vector<fixedIntWidth,size_type_class>::get_int(size_type idx, const uint8_t len)const
01213 {
01214 #ifdef SDSL_DEBUG
01215     if (idx+len > m_size) {
01216         throw std::out_of_range("OUT_OF_RANGE_ERROR: int_vector::get_int(size_type, uint8_t); idx+len > size()!");
01217     }
01218     if (len > 64) {
01219         throw std::out_of_range("OUT_OF_RANGE_ERROR: int_vector::get_int(size_type, uint8_t); len>64!");
01220     }
01221 #endif
01222     return bit_magic::read_int(m_data+(idx>>6), idx&0x3F, len);
01223 }
01224 
01225 template<uint8_t fixedIntWidth, class size_type_class>
01226 inline void int_vector<fixedIntWidth,size_type_class>::set_int(size_type idx, value_type x, const uint8_t len)
01227 {
01228 #ifdef SDSL_DEBUG
01229     if (idx+len > m_size) {
01230         throw std::out_of_range("OUT_OF_RANGE_ERROR: int_vector::set_int(size_type, uint8_t); idx+len > size()!");
01231     }
01232     if (len > 64) {
01233         throw std::out_of_range("OUT_OF_RANGE_ERROR: int_vector::set_int(size_type, uint8_t); len>64!");
01234     }
01235 #endif
01236     bit_magic::write_int(m_data+(idx>>6), x, idx&0x3F, len);
01237 }
01238 
01239 template<uint8_t fixedIntWidth, class size_type_class>
01240 typename int_vector<fixedIntWidth,size_type_class>::size_type int_vector<fixedIntWidth,size_type_class>::size() const
01241 {
01242     return m_size/m_int_width;
01243 }
01244 
01245 template<uint8_t fixedIntWidth, class size_type_class>
01246 typename int_vector<fixedIntWidth,size_type_class>::size_type int_vector<fixedIntWidth,size_type_class>::max_size()
01247 {
01248     return ((size_type)1)<<(sizeof(size_type)*8-6);// TODO: motivation for this expression
01249 }
01250 
01251 template<uint8_t fixedIntWidth, class size_type_class>
01252 typename int_vector<fixedIntWidth,size_type_class>::size_type int_vector<fixedIntWidth,size_type_class>::bit_size() const
01253 {
01254     return m_size;
01255 }
01256 
01257 template<uint8_t fixedIntWidth, class size_type_class>
01258 bool int_vector<fixedIntWidth,size_type_class>::empty() const
01259 {
01260     return m_size==0;
01261 }
01262 
01263 template<uint8_t fixedIntWidth, class size_type_class>
01264 inline typename int_vector<fixedIntWidth,size_type_class>::size_type int_vector<fixedIntWidth,size_type_class>::capacity() const
01265 {
01266     return ((m_size+63)>>6)<<6;
01267 }
01268 
01269 template<uint8_t fixedIntWidth, class size_type_class>
01270 const uint8_t int_vector<fixedIntWidth,size_type_class>::get_int_width()const
01271 {
01272     return m_int_width;
01273 }
01274 
01275 template<uint8_t fixedIntWidth, class size_type_class>
01276 void int_vector<fixedIntWidth,size_type_class>::set_int_width(uint8_t width)
01277 {
01278     int_vector_trait<fixedIntWidth,size_type_class>::set_int_width(m_int_width, width); // delegate to trait function
01279 }
01280 
01281 template<uint8_t fixedIntWidth, class size_type_class>
01282 inline typename int_vector<fixedIntWidth,size_type_class>::reference int_vector<fixedIntWidth,size_type_class>::operator[](const size_type& idx)
01283 {
01284     size_type i = idx * m_int_width;
01285     return reference(this->m_data + (i>>6), i&0x3F, m_int_width);
01286 }
01287 
01288 // specialized [] operator for 64 bit access.
01289 template<>
01290 inline int_vector<64>::reference int_vector<64>::operator[](const size_type& idx)
01291 {
01292     return *(this->m_data+idx);
01293 }
01294 
01295 // specialized [] operator for 32 bit access.
01296 template<>
01297 inline int_vector<32>::reference int_vector<32>::operator[](const size_type& idx)
01298 {
01299     return *(((uint32_t*)(this->m_data))+idx);
01300 }
01301 
01302 // specialized [] operator for 16 bit access.
01303 template<>
01304 inline int_vector<16>::reference int_vector<16>::operator[](const size_type& idx)
01305 {
01306     return *(((uint16_t*)(this->m_data))+idx);
01307 }
01308 
01309 // specialized [] operator for 8 bit access.
01310 template<>
01311 inline int_vector<8>::reference int_vector<8>::operator[](const size_type& idx)
01312 {
01313     return *(((uint8_t*)(this->m_data))+idx);
01314 }
01315 
01316 template<uint8_t fixedIntWidth, class size_type_class>
01317 inline typename int_vector<fixedIntWidth,size_type_class>::const_reference int_vector<fixedIntWidth,size_type_class>::operator[](const size_type& idx)const
01318 {
01319     return get_int(idx * fixedIntWidth, fixedIntWidth);
01320 }
01321 
01322 template<>
01323 inline int_vector<0>::const_reference int_vector<0>::operator[](const size_type& idx)const
01324 {
01325     return get_int(idx * m_int_width, m_int_width);
01326 }
01327 
01328 template<>
01329 inline int_vector<64>::const_reference int_vector<64>::operator[](const size_type& idx)const
01330 {
01331     return *(this->m_data+idx);
01332 }
01333 
01334 template<>
01335 inline int_vector<32>::const_reference int_vector<32>::operator[](const size_type& idx)const
01336 {
01337     return *(((uint32_t*)this->m_data)+idx);
01338 }
01339 
01340 template<>
01341 inline int_vector<16>::const_reference int_vector<16>::operator[](const size_type& idx)const
01342 {
01343     return *(((uint16_t*)this->m_data)+idx);
01344 }
01345 
01346 template<>
01347 inline int_vector<8>::const_reference int_vector<8>::operator[](const size_type& idx)const
01348 {
01349     return *(((uint8_t*)this->m_data)+idx);
01350 }
01351 
01352 template<>
01353 inline int_vector<1>::const_reference int_vector<1>::operator[](const size_type& idx)const
01354 {
01355     return ((*(m_data+(idx>>6)))>>(idx&0x3F))&1;
01356 }
01357 
01358 template<uint8_t fixedIntWidth, class size_type_class>
01359 inline const typename int_vector<fixedIntWidth,size_type_class>::iterator int_vector<fixedIntWidth,size_type_class>::begin()
01360 {
01361 //      return iterator(this, 0);
01362     return int_vector_trait<fixedIntWidth,size_type_class>::begin(this, m_data);
01363 }
01364 
01365 template<uint8_t fixedIntWidth, class size_type_class>
01366 inline const typename int_vector<fixedIntWidth,size_type_class>::iterator int_vector<fixedIntWidth,size_type_class>::end()
01367 {
01368 //      return iterator(this, m_int_width*(m_size/m_int_width));
01369     return int_vector_trait<fixedIntWidth,size_type_class>::end(this, m_data, (m_size/m_int_width));
01370 }
01371 
01372 template<uint8_t fixedIntWidth, class size_type_class>
01373 inline const typename int_vector<fixedIntWidth,size_type_class>::const_iterator int_vector<fixedIntWidth,size_type_class>::begin()const
01374 {
01375 //      return const_iterator(this, 0);
01376     return int_vector_trait<fixedIntWidth,size_type_class>::begin(this, m_data);
01377 }
01378 
01379 template<uint8_t fixedIntWidth, class size_type_class>
01380 inline const typename int_vector<fixedIntWidth,size_type_class>::const_iterator int_vector<fixedIntWidth,size_type_class>::end()const
01381 {
01382 //      return const_iterator(this, m_int_width*(m_size/m_int_width));
01383     return int_vector_trait<fixedIntWidth,size_type_class>::end(this, m_data, (m_size/m_int_width));
01384 }
01385 
01386 template<uint8_t fixedIntWidth, class size_type_class>
01387 bool int_vector<fixedIntWidth,size_type_class>::operator==(const int_vector<fixedIntWidth>& v)const
01388 {
01389     if (capacity() != v.capacity())
01390         return false;
01391     if (bit_size() != v.bit_size())
01392         return false;
01393     if (empty())
01394         return true;
01395     const uint64_t* data1 = v.data();
01396     const uint64_t* data2 = data();
01397     for (size_type i=0; i < (capacity()>>6)-1; ++i) {
01398         if (*(data1++) != *(data2++))
01399             return false;
01400     }
01401     int8_t l = 64-(capacity()-bit_size());
01402     return ((*data1)&bit_magic::Li1Mask[l])==((*data2)&bit_magic::Li1Mask[l]);
01403 }
01404 
01405 template<uint8_t fixedIntWidth, class size_type_class>
01406 bool int_vector<fixedIntWidth,size_type_class>::operator<(const int_vector& v)const
01407 {
01408     size_type min_size = size();
01409     if (min_size > v.size())
01410         min_size = v.size();
01411     for (typename int_vector<fixedIntWidth,size_type_class>::const_iterator it = begin(), end = begin()+min_size, it_v = v.begin(); it!=end; ++it, ++it_v) {
01412         if (*it == *it_v)
01413             continue;
01414         else
01415             return *it < *it_v;
01416     }
01417     return  size() < v.size();
01418 }
01419 
01420 template<uint8_t fixedIntWidth, class size_type_class>
01421 bool int_vector<fixedIntWidth,size_type_class>::operator>(const int_vector& v)const
01422 {
01423     size_type min_size = size();
01424     if (min_size > v.size())
01425         min_size = v.size();
01426     for (typename int_vector<fixedIntWidth,size_type_class>::const_iterator it = begin(), end = begin()+min_size, it_v = v.begin(); it!=end; ++it, ++it_v) {
01427         if (*it == *it_v)
01428             continue;
01429         else
01430             return *it > *it_v;
01431     }
01432     return  size() > v.size();
01433 }
01434 
01435 template<uint8_t fixedIntWidth, class size_type_class>
01436 bool int_vector<fixedIntWidth,size_type_class>::operator<=(const int_vector& v)const
01437 {
01438     return *this==v or *this<v;
01439 }
01440 
01441 template<uint8_t fixedIntWidth, class size_type_class>
01442 bool int_vector<fixedIntWidth,size_type_class>::operator>=(const int_vector& v)const
01443 {
01444     return *this==v or *this>v;
01445 }
01446 
01447 template<uint8_t fixedIntWidth, class size_type_class>
01448 bool int_vector<fixedIntWidth,size_type_class>::operator!=(const int_vector& v)const
01449 {
01450     return !(*this==v);
01451 }
01452 
01453 template<class size_type_class>
01454 size_type_class _sdsl_serialize_size_and_int_width(std::ostream& out, uint8_t fixed_int_width, uint8_t int_width, size_type_class size)
01455 {
01456     size_type_class written_bytes = 0;
01457     out.write((char*) &size, sizeof(size));
01458     written_bytes += sizeof(size);
01459     if (fixed_int_width == 0) {
01460         out.write((char*) &int_width, sizeof(int_width));
01461         written_bytes += sizeof(int_width);
01462     }
01463     return written_bytes;
01464 }
01465 
01466 template<uint8_t fixedIntWidth, class size_type_class>
01467 typename int_vector<fixedIntWidth,size_type_class>::size_type int_vector<fixedIntWidth,size_type_class>::serialize(std::ostream& out,
01468         structure_tree_node* v,
01469         string name,
01470         bool write_fixed_as_variable) const
01471 {
01472     structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
01473     size_type written_bytes = 0;
01474     if (fixedIntWidth > 0 and write_fixed_as_variable) {
01475         written_bytes += _sdsl_serialize_size_and_int_width(out, 0, fixedIntWidth, m_size);
01476     } else {
01477         written_bytes += _sdsl_serialize_size_and_int_width(out, fixedIntWidth, m_int_width, m_size);
01478     }
01479     uint64_t* p = m_data;
01480     const static size_type SDSL_BLOCK_SIZE = (1<<28);
01481     size_type idx = 0;
01482     while (idx+SDSL_BLOCK_SIZE < (capacity()>>6)) {
01483         out.write((char*) p, SDSL_BLOCK_SIZE*sizeof(uint64_t));
01484         written_bytes += SDSL_BLOCK_SIZE*sizeof(uint64_t);
01485         p       += SDSL_BLOCK_SIZE;
01486         idx     += SDSL_BLOCK_SIZE;
01487     }
01488     out.write((char*) p, ((capacity()>>6)-idx)*sizeof(uint64_t));
01489     written_bytes += ((capacity()>>6)-idx)*sizeof(uint64_t);
01490     structure_tree::add_size(child, written_bytes);
01491     return written_bytes;
01492 }
01493 
01494 template<uint8_t fixedIntWidth, class size_type_class>
01495 void int_vector<fixedIntWidth,size_type_class>::load(std::istream& in)
01496 {
01497     size_type size;
01498     int_vector_trait<fixedIntWidth, size_type_class>::read_header(size, m_int_width, in);
01499 
01500 //      util::read_member(size, in);
01501 //      if( fixedIntWidth == 0 ){
01502 //              in.read((char *) &intWidth, sizeof(m_int_width));
01503 //      }
01504 //      else
01505 //              intWidth = fixedIntWidth;
01506 //      set_int_width(intWidth);
01507 
01508     bit_resize(size);
01509     uint64_t* p = m_data;
01510     const static size_type SDSL_BLOCK_SIZE = (1<<28); // TODO: is this the loading bottleneck?
01511     size_type idx = 0;
01512     while (idx+SDSL_BLOCK_SIZE < (capacity()>>6)) {
01513         in.read((char*) p, SDSL_BLOCK_SIZE*sizeof(uint64_t));
01514         p       += SDSL_BLOCK_SIZE;
01515         idx += SDSL_BLOCK_SIZE;
01516     }
01517     in.read((char*) p, ((capacity()>>6)-idx)*sizeof(uint64_t));
01518 }
01519 
01521 template<class size_type_class>
01522 class char_array_serialize_wrapper
01523 {
01524     public:
01525         typedef size_type_class                 size_type;
01526     private:
01527 
01528         size_type m_n;  // number of char
01529         const unsigned char* m_cp;
01530 
01531     public:
01532 
01533         char_array_serialize_wrapper(const unsigned char* c=NULL, size_type n=0):m_n(n), m_cp(c) {}
01534 
01535 //      char_array_serialize_wrapper(const char* c=NULL, size_type n=0):m_n(n), m_cp(c){}
01536 
01537         size_type serialize(std::ostream& out) const {
01538             size_type size = m_n*8; // number of bits
01539             size_type written_bytes = 0;
01540             written_bytes += _sdsl_serialize_size_and_int_width(out, 8, 8, size);
01541             const char* p = (const char*)m_cp;
01542             const static size_type SDSL_BLOCK_SIZE = (1<<22);
01543             size_type idx = 0;
01544             while (idx+SDSL_BLOCK_SIZE < m_n) {
01545                 out.write(p, SDSL_BLOCK_SIZE);
01546                 written_bytes += SDSL_BLOCK_SIZE;
01547                 p += SDSL_BLOCK_SIZE;
01548                 idx += SDSL_BLOCK_SIZE;
01549             }
01550             // now: m_n-idx <= SDSL_BLOCK_SIZE
01551             out.write(p , m_n-idx);
01552             written_bytes += m_n-idx;
01553             uint32_t r= (sizeof(uint64_t)-(m_n % sizeof(uint64_t))) % sizeof(uint64_t);
01554             out.write("\0\0\0\0\0\0\0\0", r);
01555             written_bytes += r;
01556             return written_bytes;
01557         }
01558 
01559 };
01560 
01562 template<uint8_t fixedIntWidth, class size_type_class>
01563 class int_vector_file_buffer
01564 {
01565     public:
01566         typedef typename int_vector<fixedIntWidth, size_type_class>::size_type                  size_type;
01567         typedef typename int_vector<fixedIntWidth, size_type_class>::value_type                 value_type;
01568         typedef typename int_vector<fixedIntWidth, size_type_class>::const_reference    const_reference;
01569         typedef typename int_vector<fixedIntWidth, size_type_class>::reference                  reference;
01570         typedef typename int_vector<fixedIntWidth, size_type_class>::int_width_type             int_width_type;
01571 
01572     private:
01573 
01574         std::fstream m_in;
01575         uint64_t* m_buf;
01576         size_type m_off; // offset in the first 64bit word of the buffer
01577         size_type m_read_values; // number of values read in the last buffer operation
01578         size_type m_len;
01579         size_type m_int_vector_size;
01580         size_type m_read_values_sum;
01581         int_width_type   m_int_width;
01582         std::string m_file_name;
01583         bool    m_load_from_plain;
01584 
01585         void load_size_and_width() {
01586             int_vector_trait<fixedIntWidth, size_type_class>::read_header(m_int_vector_size, m_int_width, m_in);
01587             m_int_vector_size/=m_int_width;
01588         }
01589 
01590         size_type words_to_read(size_type len) {
01591             if (len == 0)
01592                 return 0;
01593             size_type last_off = (m_read_values_sum*m_int_width)%64;
01594             assert(((64-last_off)%64) < m_int_width);
01595             return ((len*m_int_width - ((64-last_off)%64))+63)/64;
01596         }
01597 
01598         void init() {
01599             m_int_vector_size   = 0;
01600             m_off                               = 0;
01601             m_read_values               = 0;
01602             m_read_values_sum   = 0;
01603         }
01604 
01605     public:
01606 
01607         const size_type& int_vector_size;
01608         const uint8_t& int_width;
01609         const std::string& file_name;
01610 
01612         /*
01613          * \param f_file_name   File which contains the int_vector.
01614          * \param len                   Length of the buffer in elements.
01615          */
01616         int_vector_file_buffer(const char* f_file_name=NULL, size_type len=1000000, uint8_t int_width=0):m_in(), m_buf(NULL), m_off(0), m_read_values(0), m_len(0), m_int_vector_size(0), m_read_values_sum(0), m_int_width(fixedIntWidth), m_file_name(), m_load_from_plain(false), int_vector_size(m_int_vector_size), int_width(m_int_width), file_name(m_file_name) {
01617             m_load_from_plain = false;
01618             int_vector_trait<fixedIntWidth, size_type_class>::set_int_width(m_int_width, int_width);
01619             m_len                               = len;
01620             init();
01621             if (f_file_name == NULL) {
01622                 return;
01623             }
01624             m_file_name = f_file_name;
01625             m_in.open(m_file_name.c_str(), std::ifstream::in);
01626             if (m_in.is_open()) {
01627                 load_size_and_width();
01628                 m_buf = new uint64_t[(m_len*m_int_width+63)/64 + 2];
01629             } else {
01630                 m_buf = NULL;
01631             }
01632         }
01633 
01634         // initialize int_vector_file_buffer from a plain file
01635         // works only for fixedIntWidth = 8 // TODO extent to 1,16,32,64
01636         bool load_from_plain(const char* f_file_name, size_type len=1000000, uint8_t int_width=0) {
01637             if (f_file_name == NULL) {
01638                 std::logic_error("ERROR: int_vector_file_buffer::load_from_plain expects a file name.");
01639                 return false;
01640             }
01641             if (fixedIntWidth != 8) {
01642                 std::logic_error("ERROR: int_vector_file_buffer: load_from_plain is only implemented for fixedIntWidth=8.");
01643                 return false;
01644             }
01645             m_load_from_plain = true;
01646             int_vector_trait<fixedIntWidth, size_type_class>::set_int_width(m_int_width, int_width);
01647             m_len = len;
01648             m_file_name = f_file_name;
01649             m_int_vector_size = get_file_size(m_file_name.c_str());
01650             m_in.open(m_file_name.c_str(), std::ifstream::in);
01651             if (m_in.is_open()) {
01652                 m_buf = new uint64_t[(m_len*m_int_width+63)/64 + 2];
01653             } else {
01654                 m_buf = NULL;
01655                 return false;
01656             }
01657             return true;
01658         }
01659 
01660         bool reset(size_type new_buf_len=0) {
01661             m_in.clear();
01662             if (!m_in.seekg(0, std::ios::beg)) {
01663                 throw std::ios_base::failure("int_vector_file_buffer: reset()");
01664                 return false;
01665             };
01666             init();
01667             if (!m_load_from_plain) {
01668                 load_size_and_width();
01669             } else {
01670                 m_int_vector_size = get_file_size(m_file_name.c_str());
01671             }
01672             if (new_buf_len > 0 and new_buf_len != m_len) {
01673                 if (m_buf != NULL)
01674                     delete [] m_buf;
01675                 m_len = new_buf_len;
01676                 m_buf = new uint64_t[(m_len*m_int_width+63)/64 + 2];
01677             }
01678             return true;
01679         }
01680 
01681         bool valid() {
01682             return m_buf!=NULL;
01683         }
01684 
01685         size_type load_next_block() {
01686             if (m_read_values_sum == m_int_vector_size) {
01687                 return 0;
01688             }
01689             assert(m_read_values_sum < m_int_vector_size);
01690             size_type values_to_read    = m_len;
01691             if (values_to_read + m_read_values_sum > m_int_vector_size) {
01692                 values_to_read = m_int_vector_size - m_read_values_sum;
01693             }
01694             if (((m_read_values_sum*m_int_width)%64) == 0) { //if the new offset == 0
01695                 m_in.read((char*) m_buf, words_to_read(values_to_read) * sizeof(uint64_t));
01696                 m_off = 0;
01697             } else { // the new offset != 0
01698                 m_buf[0] = m_buf[(m_read_values*m_int_width+m_off)/64 ];
01699                 m_in.read((char*)(m_buf+1), words_to_read(values_to_read) * sizeof(uint64_t));
01700                 m_off = (m_read_values_sum*m_int_width)%64;
01701             }
01702             m_read_values_sum   += values_to_read;
01703             m_read_values               =  values_to_read;
01704             return m_read_values;
01705         }
01706 
01707         /*
01708         void write_current_block(){
01709                 if( m_read_values > 0 ){ // if we there are some values in the buffer
01710 
01711                 }
01712         }
01713         */
01714 
01716         /* \pre \f$ i < len \f$
01717          */
01718         value_type operator[](const size_type i)const {
01719             assert(i<m_len);
01720             size_type idx = i*m_int_width+m_off;
01721             return bit_magic::read_int(m_buf + (idx>>6), idx&0x3F, m_int_width);
01722         }
01723 
01724         void set_int(const size_type i, uint64_t x) {
01725             size_type idx = i*m_int_width+m_off;
01726             bit_magic::write_int(m_buf + (idx>>6), x, idx&0x3F, m_int_width);
01727         }
01728 
01729         const uint64_t* data()const {
01730             return m_buf;
01731         }
01732         //TODO: proxy object for writting the int buffer
01733         /*
01734                 reference operator[](const size_type i){
01735                         assert(i<m_len);
01736                         size_type idx = i*m_int_width+m_off;
01737                         return reference(m_buf + (idx>>6), idx&0x3F, m_int_width);
01738                 }
01739         */
01740 
01741         ~int_vector_file_buffer() {
01742             if (m_in.is_open()) {
01743                 m_in.close(); // close ifstream
01744                 delete [] m_buf;
01745             }
01746         }
01747 };
01748 
01749 /*
01750 // specialized [] operator for 8 bit access.
01751 template<>
01752 inline int_vector_file_buffer<8, uint64_t>::reference int_vector_file_buffer<8, uint64_t>::operator[](const size_type idx){
01753         assert(idx<m_len);
01754         return *(((uint8_t*)(m_buf))+idx);
01755 }
01756 
01757 
01758 // specialized [] operator for 8 bit access.
01759 template<>
01760 inline int_vector_file_buffer<8, uint32_t>::reference int_vector_file_buffer<8, uint32_t>::operator[](const size_type idx){
01761         assert(idx<m_len);
01762         return *(((uint8_t*)(m_buf))+idx);
01763 }
01764 */
01765 
01766 }// end namespace sdsl
01767 
01768 #endif // end file