SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2008 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef INCLUDED_SDSL_INT_VECTOR 00022 #define INCLUDED_SDSL_INT_VECTOR 00023 00024 #include "compatibility.hpp" 00025 #include "bitmagic.hpp" 00026 #include "util.hpp" 00027 #include "testutils.hpp" 00028 #include "uintx_t.hpp" 00029 #include "structure_tree.hpp" 00030 #include <iosfwd> // forward declaration of ostream 00031 #include <stdexcept> // for exceptions 00032 #include <iostream> // for cerr 00033 #include <typeinfo> 00034 #include <cassert> 00035 #include <iterator> 00036 #include <cstdlib> 00037 #include <cstddef> 00038 #include <ctime> // for rand initialization 00039 #include <cstring> // for memcpy 00040 #include <ostream> 00041 #include <istream> 00042 #include <string> 00043 00044 00045 00047 namespace sdsl 00048 { 00049 00050 typedef uint64_t std_size_type_for_int_vector; 00051 00052 template<uint8_t fixedIntWidth=0, class size_type_class = std_size_type_for_int_vector> 00053 class int_vector; // forward declaration 00054 00055 00056 namespace algorithm 00057 { 00058 template<uint8_t fixedIntWidth> 00059 static void calculate_sa(const unsigned char* c, typename int_vector<fixedIntWidth>::size_type len, int_vector<fixedIntWidth>& sa); 00060 } 00061 00062 00064 typedef int_vector<1> bit_vector; 00065 00066 template<class int_vector> 00067 class int_vector_reference; 00068 00069 template<class int_vector> 00070 class int_vector_iterator_base; // forward declaration 00071 00072 template<class int_vector> 00073 class int_vector_iterator; // forward declaration 00074 00075 template<class int_vector> 00076 class int_vector_const_iterator; // forward declaration 00077 00078 template<uint8_t b, uint8_t patter_len> // forward declaration 00079 class rank_support_v; 00080 00081 class rank_support_jmc; 00082 template<uint8_t bit_pattern, uint8_t pattern_len> 00083 class rank_support_v; 00084 class rank_support; 00085 00086 class select_support; 00087 00088 template<class RankSupport> 00089 class select_support_bs; 00090 00091 template<uint8_t bit_pattern, uint8_t pattern_len> // forward declaration 00092 class select_support_mcl; 00093 00094 namespace coder 00095 { 00096 class fibonacci; 00097 class elias_delta; 00098 class ternary; 00099 } 00100 00101 template<uint8_t=0, class size_type_class = std_size_type_for_int_vector> 00102 class int_vector_file_buffer; 00103 00104 template<class size_type_class = std_size_type_for_int_vector> 00105 class char_array_serialize_wrapper; 00106 00107 00108 template<uint8_t fixedIntWidth, class size_type_class> 00109 struct int_vector_trait { 00110 typedef uint64_t value_type; 00111 typedef size_type_class size_type; 00112 typedef int_vector<fixedIntWidth, size_type_class> int_vector_type; 00113 typedef int_vector_reference<int_vector_type> reference; 00114 typedef const uint64_t const_reference; 00115 typedef uint8_t int_width_type; 00116 typedef int_vector_iterator<int_vector_type> iterator; 00117 typedef int_vector_const_iterator<int_vector_type> const_iterator; 00118 // Sets int_width to new_int_width 00119 static void set_int_width(int_width_type& int_width, const uint8_t new_int_width) { 00120 if (fixedIntWidth==1) 00121 return; 00122 if (new_int_width>0 and new_int_width<=64) 00123 int_width = new_int_width; 00124 else 00125 int_width = 64; 00126 } 00127 00128 // read the size and int_width of a bit_vector 00129 static void read_header(size_type& size, int_width_type& int_width, std::istream& in) { 00130 util::read_member(size, in); 00131 if (0 == fixedIntWidth) { 00132 util::read_member(int_width, in); 00133 set_int_width(int_width, int_width); 00134 } 00135 } 00136 00137 static iterator begin(int_vector_type* v, uint64_t*) { 00138 return iterator(v, 0); 00139 } 00140 static iterator end(int_vector_type* v, uint64_t*, size_type) { 00141 return iterator(v, v->size()*v->get_int_width()) ; 00142 } 00143 static const_iterator begin(const int_vector_type* v, const uint64_t*) { 00144 return const_iterator(v, 0); 00145 } 00146 static const_iterator end(const int_vector_type* v, const uint64_t*, size_type) { 00147 return const_iterator(v, v->size()*v->get_int_width()); 00148 } 00149 }; 00150 00151 template<class size_type_class> 00152 struct int_vector_trait<64, size_type_class> { 00153 typedef uint64_t value_type; 00154 typedef size_type_class size_type; 00155 typedef int_vector<64, size_type_class> int_vector_type; 00156 typedef uint64_t& reference; 00157 typedef const uint64_t const_reference; 00158 typedef const uint8_t int_width_type; 00159 typedef uint64_t* iterator; 00160 typedef const uint64_t* const_iterator; 00161 00162 00163 static void set_int_width(int_width_type&, const uint8_t) {} 00164 00165 // read the size and int_width of a bit_vector 00166 static void read_header(size_type& size, int_width_type&, std::istream& in) { 00167 util::read_member(size, in); 00168 } 00169 00170 static iterator begin(int_vector_type*, uint64_t* begin) { 00171 return begin; 00172 } 00173 static iterator end(int_vector_type*, uint64_t* begin, size_type size) { 00174 return begin+size; 00175 } 00176 static const_iterator begin(const int_vector_type*, const uint64_t* begin) { 00177 return begin; 00178 } 00179 static const_iterator end(const int_vector_type*, const uint64_t* begin, size_type size) { 00180 return begin+size; 00181 } 00182 }; 00183 00184 template<class size_type_class> 00185 struct int_vector_trait<32, size_type_class> { 00186 typedef uint32_t value_type; 00187 typedef size_type_class size_type; 00188 typedef int_vector<32, size_type_class> int_vector_type; 00189 typedef uint32_t& reference; 00190 typedef const uint32_t const_reference; 00191 typedef const uint8_t int_width_type; 00192 typedef uint32_t* iterator; 00193 typedef const uint32_t* const_iterator; 00194 static void set_int_width(int_width_type&, const uint8_t) {} 00195 00196 // read the size and int_width of a bit_vector 00197 static void read_header(size_type& size, int_width_type&, std::istream& in) { 00198 util::read_member(size, in); 00199 } 00200 00201 static iterator begin(int_vector_type*, uint64_t* begin) { 00202 return (uint32_t*)begin; 00203 } 00204 static iterator end(int_vector_type*, uint64_t* begin, size_type size) { 00205 return ((uint32_t*)begin)+size; 00206 } 00207 static const_iterator begin(const int_vector_type*, const uint64_t* begin) { 00208 return (uint32_t*)begin; 00209 } 00210 static const_iterator end(const int_vector_type*, const uint64_t* begin, size_type size) { 00211 return ((uint32_t*)begin)+size; 00212 } 00213 }; 00214 00215 template<class size_type_class> 00216 struct int_vector_trait<16, size_type_class> { 00217 typedef uint16_t value_type; 00218 typedef size_type_class size_type; 00219 typedef int_vector<16, size_type_class> int_vector_type; 00220 typedef uint16_t& reference; 00221 typedef const uint16_t const_reference; 00222 typedef const uint8_t int_width_type; 00223 typedef uint16_t* iterator; 00224 typedef const uint16_t* const_iterator; 00225 static void set_int_width(int_width_type&, const uint8_t) {} 00226 00227 // read the size and int_width of a bit_vector 00228 static void read_header(size_type& size, int_width_type&, std::istream& in) { 00229 util::read_member(size, in); 00230 } 00231 00232 static iterator begin(int_vector_type*, uint64_t* begin) { 00233 return (uint16_t*)begin; 00234 } 00235 static iterator end(int_vector_type*, uint64_t* begin, size_type size) { 00236 return ((uint16_t*)begin)+size; 00237 } 00238 static const_iterator begin(const int_vector_type*, const uint64_t* begin) { 00239 return (uint16_t*)begin; 00240 } 00241 static const_iterator end(const int_vector_type*, const uint64_t* begin, size_type size) { 00242 return ((uint16_t*)begin)+size; 00243 } 00244 }; 00245 00246 template<class size_type_class> 00247 struct int_vector_trait<8, size_type_class> { 00248 typedef uint8_t value_type; 00249 typedef size_type_class size_type; 00250 typedef int_vector<8, size_type_class> int_vector_type; 00251 typedef uint8_t& reference; 00252 typedef const uint8_t const_reference; 00253 typedef const uint8_t int_width_type; 00254 typedef uint8_t* iterator; 00255 typedef const uint8_t* const_iterator; 00256 static void set_int_width(int_width_type&, const uint8_t) {} 00257 00258 // read the size and int_width of a bit_vector 00259 static void read_header(size_type& size, int_width_type&, std::istream& in) { 00260 util::read_member(size, in); 00261 } 00262 00263 static iterator begin(int_vector_type*, uint64_t* begin) { 00264 return (uint8_t*)begin; 00265 } 00266 static iterator end(int_vector_type*, uint64_t* begin, size_type size) { 00267 return ((uint8_t*)begin)+size; 00268 } 00269 static const_iterator begin(const int_vector_type*, const uint64_t* begin) { 00270 return (uint8_t*)begin; 00271 } 00272 static const_iterator end(const int_vector_type*, const uint64_t* begin, size_type size) { 00273 return ((uint8_t*)begin)+size; 00274 } 00275 }; 00276 00278 00303 template<uint8_t fixedIntWidth, class size_type_class> 00304 class int_vector 00305 { 00306 public: 00307 00308 typedef typename int_vector_trait<fixedIntWidth,size_type_class>::value_type value_type; // STL Container requirement 00309 typedef typename int_vector_trait<fixedIntWidth,size_type_class>::iterator iterator; // STL Container requirement 00310 typedef typename int_vector_trait<fixedIntWidth,size_type_class>::const_iterator const_iterator; 00311 typedef typename int_vector_trait<fixedIntWidth,size_type_class>::reference reference; 00312 typedef typename int_vector_trait<fixedIntWidth,size_type_class>::const_reference const_reference; 00313 typedef int_vector_reference<int_vector>* pointer; // TODO: fix this for 63,32,16,8 bit vector 00314 typedef const value_type* const_pointer; 00315 typedef ptrdiff_t difference_type;// STL Container requirement 00316 typedef size_type_class size_type; // STL Container requirement 00317 typedef typename int_vector_trait<fixedIntWidth,size_type_class>::int_width_type int_width_type; 00318 typedef rank_support_v<1,1> rank_1_type; 00319 typedef rank_support_v<0,1> rank_0_type; 00320 typedef select_support_mcl<1,1> select_1_type; 00321 typedef select_support_mcl<0,1> select_0_type; 00322 00323 friend struct int_vector_trait<fixedIntWidth, size_type_class>; 00324 friend class int_vector_iterator_base<int_vector>; 00325 friend class int_vector_iterator<int_vector>; 00326 friend class int_vector_const_iterator<int_vector>; 00327 friend class coder::elias_delta; 00328 friend class coder::fibonacci; 00329 friend class coder::ternary; 00330 friend class int_vector_file_buffer<fixedIntWidth, size_type_class>; 00332 friend std::istream& operator>>(std::istream&, int_vector<1>&); 00333 friend void util::set_random_bits<int_vector>(int_vector& v, int); 00334 friend void util::set_zero_bits<int_vector>(int_vector& v); 00335 friend void util::set_one_bits<int_vector>(int_vector& v); 00336 friend void util::bit_compress<int_vector>(int_vector& v); 00337 friend void util::set_all_values_to_k<int_vector>(int_vector& v, uint64_t k); 00338 friend void algorithm::calculate_sa<fixedIntWidth>(const unsigned char* c, typename int_vector<fixedIntWidth>::size_type len, int_vector<fixedIntWidth>& sa); 00339 private: 00340 size_type m_size; 00341 uint64_t* m_data; 00342 int_width_type m_int_width; 00343 public: 00344 00346 00351 int_vector(size_type elements = 0, value_type default_value = 0, uint8_t intWidth = fixedIntWidth); 00352 00354 00357 int_vector(const int_vector& v); 00358 00360 ~int_vector(); 00361 00363 00366 bool empty() const; 00367 00369 /* The swap method can be defined in terms of assignment. 00370 This requires three assignments, each of which, for a container type, is linear 00371 in the container's size. In a sense, then, a.swap(b) is redundant. 00372 This implementation guaranties a run-time complexity that is constant rather than linear. 00373 \params v int_vector to swap. 00374 Required for the Assignable Conecpt of the STL. 00375 */ 00376 void swap(int_vector& v); 00377 00379 00383 void resize(const size_type size); 00384 00386 00388 void bit_resize(const size_type size); 00389 00391 00395 size_type size() const; 00396 00398 00402 static size_type max_size(); 00403 00405 00408 size_type bit_size() const; 00409 00411 00415 size_type capacity() const; 00416 00418 00421 const uint64_t* data() const { 00422 return m_data; 00423 } 00424 00426 00431 const value_type get_int(size_type idx, const uint8_t len=64) const; 00432 00434 00441 void set_int(size_type idx, value_type x, const uint8_t len=64); 00442 00444 00447 const uint8_t get_int_width() const; 00448 00450 00454 void set_int_width(uint8_t intWidth); 00455 00457 00461 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name = "", bool write_fixed_as_variable=false) const; 00462 00464 void load(std::istream& in); 00465 00467 00472 inline reference operator[](const size_type& i); 00473 00475 00480 inline const_reference operator[](const size_type& i) const; 00481 00482 // ! This operator should be used for displaying a specific range of the int_vector . 00483 /* ! \param range This const char array should consists of an integer <b>begin_idx</b> followed by two dots (..) followed by an integer <b>end_idx</b>. E.g. "0..10" or "3..7". 00484 \return A string containing the bit representation from index <b>begin_idx</b> to (inclusive!) <b>end_idx</b>. 00485 */ 00486 // std::string operator[](const char* range) const; 00487 00489 00494 int_vector& operator=(const int_vector& v); 00495 00496 00498 00506 bool operator==(const int_vector<fixedIntWidth>& v) const; 00507 00509 00519 bool operator!=(const int_vector& v) const; 00520 00522 00529 bool operator<(const int_vector& v) const; //TODO: Test 00530 00532 00539 bool operator>(const int_vector& v) const;//TODO: Test 00541 00545 bool operator<=(const int_vector& v) const;//TODO: Test 00547 00551 bool operator>=(const int_vector& v) const;//TODO: Test 00552 00554 00557 const iterator begin(); 00558 00560 00563 const iterator end(); 00564 00566 00568 const const_iterator begin() const; 00569 00571 00573 const const_iterator end() const; 00574 00575 //TODO: rbegin() 00576 //TODO: rend() 00577 00578 private: 00580 /* \param i Position of the bit to set to value b. 00581 \param b Value to set the bit at position i. 00582 \sa getBit 00583 */ 00584 void setBit(size_type i,const bool b=true); 00585 00587 /* \param i Position of the bit. 00588 \returns The boolean value of the bit at position i. 00589 \sa setBit 00590 */ 00591 const bool getBit(size_type i) const; 00592 00593 00594 }; 00606 00607 template<class int_vector> 00608 class int_vector_reference 00609 { 00610 private: 00611 typename int_vector::value_type* const m_word; 00612 const uint8_t m_offset; 00613 const uint8_t m_len; 00614 public: 00616 00620 int_vector_reference(typename int_vector::value_type* word, uint8_t offset, uint8_t len): 00621 m_word(word),m_offset(offset),m_len(len) {}; 00622 00624 00631 int_vector_reference& operator=(typename int_vector::value_type x) { 00632 bit_magic::write_int(m_word, x, m_offset, m_len); 00633 return *this; 00634 }; 00635 00636 int_vector_reference& operator=(const int_vector_reference& x) { 00637 return *this = typename int_vector::value_type(x); 00638 }; 00639 00641 operator typename int_vector::value_type()const { 00642 return bit_magic::read_int(m_word, m_offset, m_len); 00643 } 00644 00645 bool operator==(const int_vector_reference& x)const { 00646 return typename int_vector::value_type(*this) == typename int_vector::value_type(x); 00647 } 00648 00649 bool operator<(const int_vector_reference& x)const { 00650 return typename int_vector::value_type(*this) < typename int_vector::value_type(x); 00651 } 00652 }; 00653 00654 // specialization for int_vector_reference for int_vector == bit_vector 00655 // special thanks to Timo Beller, who pointed out that the specialization is missing 00656 template<> 00657 class int_vector_reference<bit_vector> 00658 { 00659 private: 00660 uint64_t* const m_word; 00661 uint64_t m_mask; // TODO make it const 00662 public: 00664 00667 int_vector_reference(uint64_t* word, uint8_t offset, uint8_t): 00668 m_word(word),m_mask(1ULL<<offset) {}; 00669 00671 00678 int_vector_reference& operator=(bool x) { 00679 if (x) 00680 *m_word |= m_mask; 00681 else 00682 *m_word &= ~m_mask; 00683 return *this; 00684 }; 00685 00686 int_vector_reference& operator=(const int_vector_reference& x) { 00687 return *this = bool(x); 00688 }; 00689 00691 operator bool()const { 00692 return !!(*m_word & m_mask); 00693 } 00694 00695 bool operator==(const int_vector_reference& x)const { 00696 return bool(*this) == bool(x); 00697 } 00698 00699 bool operator<(const int_vector_reference& x)const { 00700 return !bool(*this) && bool(x); 00701 } 00702 }; 00703 00704 00705 00706 template<class int_vector> 00707 class int_vector_iterator_base: public std::iterator<std::random_access_iterator_tag, typename int_vector::value_type, typename int_vector::difference_type/*ptrdiff_t*/> 00708 { 00709 public: 00710 typedef /*typename int_vector::size_type*/uint64_t size_type; 00711 protected: 00712 uint8_t m_offset; 00713 uint8_t m_len; 00714 00715 public: 00716 00717 int_vector_iterator_base(uint8_t offset, uint8_t len):m_offset(offset),m_len(len) 00718 {} 00719 00720 int_vector_iterator_base(const int_vector* v=NULL, size_type idx=0) { /*:m_offset(idx&0x3F), m_len(v->m_int_width)*/ 00721 m_offset = idx&0x3F; 00722 if (v==NULL) 00723 m_len = 0; 00724 else 00725 m_len = v->m_int_width; 00726 } 00727 }; 00728 00729 template<class int_vector> 00730 class int_vector_iterator : public int_vector_iterator_base<int_vector> 00731 { 00732 public: 00733 typedef int_vector_reference<int_vector> reference; 00734 typedef int_vector_iterator iterator; 00735 typedef reference* pointer; 00736 typedef typename int_vector::size_type size_type; 00737 typedef typename int_vector::difference_type difference_type; 00738 00739 private: 00740 00741 using int_vector_iterator_base<int_vector>::m_offset; // make m_offset easy usable 00742 using int_vector_iterator_base<int_vector>::m_len; // make m_len easy usable 00743 00744 typename int_vector::value_type* m_word; 00745 00746 public: 00747 00748 inline int_vector_iterator(int_vector* v=NULL, size_type idx=0) : int_vector_iterator_base<int_vector>(v, idx) { 00749 if (v!=NULL) 00750 m_word = v->m_data + (idx>>6); 00751 else 00752 m_word = NULL; 00753 } 00754 00755 00756 inline int_vector_iterator(const int_vector_iterator<int_vector>& it) : int_vector_iterator_base<int_vector>(it.m_offset, it.m_len) { 00757 m_word = it.m_word; 00758 } 00759 00760 reference operator*() const { 00761 return reference(m_word, m_offset, m_len); 00762 } 00763 00765 iterator& operator++() { 00766 m_offset+=m_len; 00767 if (m_offset >= 64) { 00768 m_offset &= 0x3F; 00769 ++m_word; 00770 } 00771 return *this; 00772 } 00773 00775 iterator operator++(int x) { 00776 int_vector_iterator it = *this; 00777 ++(*this); 00778 return it; 00779 } 00780 00782 iterator& operator--() { 00783 m_offset-=m_len; 00784 if (m_offset >= 64) { 00785 m_offset &= 0x3F; 00786 --m_word; 00787 } 00788 return *this; 00789 } 00790 00792 iterator operator--(int x) { 00793 int_vector_iterator it = *this; 00794 --(*this); 00795 return it; 00796 } 00797 00798 iterator& operator+=(difference_type i) { 00799 if (i<0) 00800 return *this -= (-i); 00801 difference_type t = i*m_len; 00802 m_word += (t>>6); 00803 if ((m_offset+=(t&0x3F))&~0x3F) { // if new offset is >= 64 00804 ++m_word; // add one to the word 00805 m_offset&=0x3F; // offset = offset mod 64 00806 } 00807 return *this; 00808 } 00809 00810 iterator& operator-=(difference_type i) { 00811 if (i<0) 00812 return *this += (-i); 00813 difference_type t = i*m_len; 00814 m_word -= (t>>6); 00815 if ((m_offset-=(t&0x3F))&~0x3F) { // if new offset is < 0 00816 --m_word; 00817 m_offset&=0x3F; 00818 } 00819 return *this; 00820 } 00821 00822 iterator& operator=(const int_vector_iterator<int_vector>& it) { 00823 if (this != &it) { 00824 m_word = it.m_word; 00825 m_offset = it.m_offset; 00826 m_len = it.m_len; 00827 } 00828 return *this; 00829 } 00830 00831 iterator operator+(difference_type i) const { 00832 iterator it = *this; 00833 return it += i; 00834 } 00835 00836 iterator operator-(difference_type i) const { 00837 iterator it = *this; 00838 return it -= i; 00839 } 00840 00841 reference operator[](difference_type i) const { 00842 return *(*this + i); 00843 } 00844 00845 bool operator==(const int_vector_iterator& it)const { 00846 return it.m_word == m_word && it.m_offset == m_offset; 00847 } 00848 00849 bool operator!=(const int_vector_iterator& it)const { 00850 return !(*this==it); 00851 } 00852 00853 bool operator<(const int_vector_iterator& it)const { 00854 if (m_word == it.m_word) 00855 return m_offset < it.m_offset; 00856 return m_word < it.m_word; 00857 } 00858 00859 bool operator>(const int_vector_iterator& it)const { 00860 if (m_word == it.m_word) 00861 return m_offset > it.m_offset; 00862 return m_word > it.m_word; 00863 } 00864 00865 bool operator>=(const int_vector_iterator& it)const { 00866 return !(*this < it); 00867 } 00868 00869 bool operator<=(const int_vector_iterator& it)const { 00870 return !(*this > it); 00871 } 00872 inline difference_type operator-(const int_vector_iterator& it) { 00873 return (((m_word - it.m_word)<<6) + m_offset - it.m_offset) / m_len; 00874 } 00875 }; 00876 00877 template<class int_vector> 00878 inline int_vector_iterator<int_vector> operator+(typename int_vector_iterator<int_vector>::difference_type n, const int_vector_iterator<int_vector>& it) 00879 { 00880 return it+n; 00881 } 00882 00883 template<class int_vector> 00884 class int_vector_const_iterator : public int_vector_iterator_base<int_vector> 00885 { 00886 public: 00887 typedef typename int_vector::value_type const_reference; 00888 typedef const typename int_vector::value_type* pointer; 00889 typedef int_vector_const_iterator const_iterator; 00890 typedef typename int_vector::size_type size_type; 00891 typedef typename int_vector::difference_type difference_type; 00892 00893 private: 00894 00895 using int_vector_iterator_base<int_vector>::m_offset; // make m_offset easy usable 00896 using int_vector_iterator_base<int_vector>::m_len; // make m_len easy usable 00897 00898 const typename int_vector::value_type* m_word; 00899 00900 public: 00901 00902 int_vector_const_iterator(const int_vector* v=NULL, size_type idx=0) : int_vector_iterator_base<int_vector>(v, idx) { 00903 if (v!=NULL) 00904 m_word = v->m_data + (idx>>6); 00905 else 00906 m_word = NULL; 00907 } 00908 00909 int_vector_const_iterator(const int_vector_iterator<int_vector>& it):int_vector_iterator_base<int_vector>(it.m_offset, it.m_len),m_word(it.m_word) 00910 { } 00911 /* 00912 int_vector_const_iterator(const int_vector_const_iterator<int_vector> &it):int_vector_iterator_base<int_vector>(it.m_offset, it.m_len),m_word(it.m_word) 00913 { 00914 } 00915 */ 00916 00917 const_reference operator*() const { 00918 if (m_offset+m_len <= 64) { 00919 return ((*m_word)>>m_offset)&bit_magic::Li1Mask[m_len]; 00920 } else { 00921 return ((*m_word)>>m_offset) | ((*(m_word+1) & bit_magic::Li1Mask[(m_offset+m_len)&0x3F])<<(64-m_offset)); 00922 } 00923 } 00924 00926 const_iterator& operator++() { 00927 m_offset+=m_len; 00928 if (m_offset >= 64) { 00929 m_offset &= 0x3F; 00930 ++m_word; 00931 } 00932 return *this; 00933 } 00934 00936 const_iterator operator++(int x) { 00937 int_vector_const_iterator it = *this; 00938 ++(*this); 00939 return it; 00940 } 00941 00943 const_iterator& operator--() { 00944 m_offset-=m_len; 00945 if (m_offset >= 64) { 00946 m_offset &= 0x3F; 00947 --m_word; 00948 } 00949 return *this; 00950 } 00951 00953 const_iterator operator--(int x) { 00954 int_vector_const_iterator it = *this; 00955 --(*this); 00956 return it; 00957 } 00958 00959 const_iterator& operator+=(difference_type i) { 00960 if (i<0) 00961 return *this -= (-i); 00962 difference_type t = i*m_len; 00963 m_word += (t>>6); 00964 if ((m_offset+=(t&0x3F))&~0x3F) {// if new offset >= 64 00965 ++m_word; // add one to the word 00966 m_offset&=0x3F; // offset = offset mod 64 00967 } 00968 return *this; 00969 } 00970 00971 const_iterator& operator-=(difference_type i) { 00972 if (i<0) 00973 return *this += (-i); 00974 difference_type t = i*m_len; 00975 m_word -= (t>>6); 00976 if ((m_offset-=(t&0x3F))&~0x3F) {// if new offset is < 0 00977 --m_word; 00978 m_offset&=0x3F; 00979 } 00980 return *this; 00981 } 00982 00983 const_iterator operator+(difference_type i) const { 00984 const_iterator it = *this; 00985 return it += i; 00986 } 00987 00988 const_iterator operator-(difference_type i) const { 00989 const_iterator it = *this; 00990 return it -= i; 00991 } 00992 00993 const_reference operator[](difference_type i) const { 00994 return *(*this + i); 00995 } 00996 00997 bool operator==(const int_vector_const_iterator& it)const { 00998 return it.m_word == m_word && it.m_offset == m_offset; 00999 } 01000 01001 bool operator!=(const int_vector_const_iterator& it)const { 01002 return !(*this==it); 01003 } 01004 01005 bool operator<(const int_vector_const_iterator& it)const { 01006 if (m_word == it.m_word) 01007 return m_offset < it.m_offset; 01008 return m_word < it.m_word; 01009 } 01010 01011 bool operator>(const int_vector_const_iterator& it)const { 01012 if (m_word == it.m_word) 01013 return m_offset > it.m_offset; 01014 return m_word > it.m_word; 01015 } 01016 01017 bool operator>=(const int_vector_const_iterator& it)const { 01018 return !(*this < it); 01019 } 01020 01021 bool operator<=(const int_vector_const_iterator& it)const { 01022 return !(*this > it); 01023 } 01024 01025 }; 01026 01027 template<class int_vector> 01028 inline typename int_vector_const_iterator<int_vector>::difference_type operator-(const int_vector_const_iterator<int_vector>& x, const int_vector_const_iterator<int_vector>& y) 01029 { 01030 return (((x.m_word - y.m_word)<<6) + x.m_offset - y.m_offset) / x.m_len; 01031 } 01032 01033 template<class int_vector> 01034 inline int_vector_const_iterator<int_vector> operator+(typename int_vector_const_iterator<int_vector>::difference_type n, const int_vector_const_iterator<int_vector>& it) 01035 { 01036 return it + n; 01037 } 01038 01039 //std::ostream& operator<<(std::ostream&, const int_vector<1> &); 01040 01041 inline std::ostream& operator<<(std::ostream& os, const int_vector<1>& v) 01042 { 01043 for (int_vector<1>::const_iterator it=v.begin(), end = v.end(); it != end; ++it) { 01044 os << *it; 01045 } 01046 os << std::endl; 01047 return os; 01048 } 01049 01050 inline std::ostream& operator<<(std::ostream& os, const int_vector<0>& v) 01051 { 01052 for (int_vector<0>::const_iterator it=v.begin(), end = v.end(); it != end; ++it) { 01053 os << *it; 01054 if (it+1 != end) os << " "; 01055 } 01056 os << std::endl; 01057 return os; 01058 } 01059 01060 01061 /* 01062 inline std::istream& operator>>(std::istream &in, int_vector<1> &v){ 01063 char c; 01064 v.resize(0); // clear 01065 do{ 01066 c = in.get(); 01067 if(c=='0' || c=='1'){ 01068 v.bit_resize(v.bit_size()+1); 01069 v.setBit(v.bit_size()-1,c=='1'); 01070 } 01071 else{ 01072 break; 01073 } 01074 }while(true); 01075 return in; 01076 } 01077 */ 01078 01079 // ==== int_vector implemenation ==== 01080 01081 template<uint8_t fixedIntWidth, class size_type_class> 01082 inline int_vector<fixedIntWidth,size_type_class>::int_vector(size_type elements, value_type default_value, uint8_t intWidth):m_size(0), m_data(NULL), m_int_width(intWidth) 01083 { 01084 int_vector_trait<fixedIntWidth,size_type_class>::set_int_width(m_int_width, intWidth); 01085 resize(elements); 01086 if (default_value == 0) { 01087 util::set_zero_bits(*this); 01088 } else if (default_value == 1 and m_int_width == 1) { 01089 util::set_one_bits(*this); 01090 } else { 01091 util::set_all_values_to_k(*this, default_value); // new initialization 01092 } 01093 } 01094 01095 template<uint8_t fixedIntWidth, class size_type_class> 01096 inline int_vector<fixedIntWidth,size_type_class>::int_vector(const int_vector& v):m_size(0), m_data(NULL), m_int_width(v.m_int_width) 01097 { 01098 bit_resize(v.bit_size()); 01099 if (v.capacity() > 0) { 01100 if (memcpy(m_data, v.data() ,v.capacity()/8)==NULL) { 01101 throw std::bad_alloc(); 01102 } 01103 } 01104 int_vector_trait<fixedIntWidth,size_type_class>::set_int_width(m_int_width, v.m_int_width); 01105 // set_int_width(v.get_int_width()); 01106 } 01107 01108 template<uint8_t fixedIntWidth, class size_type_class> 01109 int_vector<fixedIntWidth,size_type_class>& int_vector<fixedIntWidth,size_type_class>::operator=(const int_vector& v) 01110 { 01111 if (this != &v) {// if v is not the same object 01112 bit_resize(v.bit_size()); 01113 if (v.bit_size()>0) { 01114 if (memcpy(m_data, v.data() ,v.capacity()/8)==NULL) { 01115 throw std::bad_alloc(); 01116 } 01117 } 01118 set_int_width(v.get_int_width()); 01119 } 01120 return *this; 01121 } 01122 01123 // Destructor 01124 template<uint8_t fixedIntWidth, class size_type_class> 01125 int_vector<fixedIntWidth,size_type_class>::~int_vector() 01126 { 01127 if (m_data != NULL) { 01128 free(m_data); //fixed delete 01129 } 01130 } 01131 01132 template<uint8_t fixedIntWidth, class size_type_class> 01133 void int_vector<fixedIntWidth,size_type_class>::swap(int_vector& v) 01134 { 01135 if (this != &v) { // if v and _this_ are not the same object 01136 size_type size = m_size; 01137 uint64_t* data = m_data; 01138 uint8_t intWidth = m_int_width; 01139 m_size = v.m_size; 01140 m_data = v.m_data; 01141 int_vector_trait<fixedIntWidth,size_type_class>::set_int_width(m_int_width, v.m_int_width); 01142 v.m_size = size; 01143 v.m_data = data; 01144 int_vector_trait<fixedIntWidth,size_type_class>::set_int_width(v.m_int_width, intWidth); 01145 } 01146 } 01147 01148 template<uint8_t fixedIntWidth, class size_type_class> 01149 void int_vector<fixedIntWidth,size_type_class>::resize(const size_type size) 01150 { 01151 bit_resize(size * get_int_width()); 01152 } 01153 01154 01155 template<uint8_t fixedIntWidth, class size_type_class> 01156 void int_vector<fixedIntWidth,size_type_class>::bit_resize(const size_type size) 01157 { 01158 bool do_realloc = ((size+63)>>6) != ((m_size+63)>>6); 01159 const size_type old_size = m_size; 01160 m_size = size; // set new size 01161 // special case: bitvector of size 0 01162 if (do_realloc or m_data==NULL) { // or (fixedIntWidth==1 and m_size==0) ) { 01163 uint64_t* data = NULL; 01164 // Note that we allocate 8 additional bytes if m_size % 64 == 0. 01165 // We need this padding since rank data structures do a memory 01166 // access to this padding to answer rank(size()) if size()%64 ==0. 01167 // Note that this padding is not counted in the serialize method! 01168 data = (uint64_t*)realloc(m_data, (((m_size+64)>>6)<<3)); // if m_data == NULL realloc 01169 // Method realloc is equivalent to malloc if m_data == NULL. 01170 // If size is zero and ptr is not NULL, a new, minimum sized object is allocated and the original object is freed. 01171 // The allocated memory is aligned such that it can be used for any data type, including AltiVec- and SSE-related types. 01172 m_data = data; 01173 // initialize unreachable bits to 0 01174 if (m_size > old_size and bit_size() < capacity()) {//m_size>0 01175 bit_magic::write_int(m_data+(bit_size()>>6), 0, bit_size()&0x3F, capacity()-bit_size()); 01176 } 01177 if ((m_size % 64) == 0) { // initialize unreachable bits with 0 01178 m_data[m_size/64] = 0; 01179 } 01180 } 01181 } 01182 01183 template<uint8_t fixedIntWidth, class size_type_class> 01184 inline void int_vector<fixedIntWidth,size_type_class>::setBit(size_type idx,const bool value) 01185 { 01186 #ifdef SDSL_DEBUG 01187 if (idx >= m_size) { 01188 throw std::out_of_range("OUT_OF_RANGE_ERROR: int_vector::setBit(size_type idx, const bool value); idx >= size()!"); 01189 } 01190 #endif 01191 size_type int64_idx = idx >> 6; // idx/64 01192 idx &= 0x3F; // idx inside a uint64_t 01193 if (value) { 01194 m_data[int64_idx] |= 1ULL << idx; 01195 } else { 01196 m_data[int64_idx] &= (bit_magic::All1Mask ^ (1ULL << idx)); 01197 } 01198 } 01199 01200 template<uint8_t fixedIntWidth, class size_type_class> 01201 inline const bool int_vector<fixedIntWidth,size_type_class>::getBit(size_type idx)const 01202 { 01203 #ifdef SDSL_DEBUG 01204 if (idx >= m_size) { 01205 throw std::out_of_range("OUT_OF_RANGE_ERROR: int_vector::getBit(size_type idx); idx >= size()!"); 01206 } 01207 #endif 01208 return m_data[idx >> 6] & (1ULL << (idx & 0x3F)); 01209 } 01210 01211 template<uint8_t fixedIntWidth, class size_type_class> 01212 inline const typename int_vector<fixedIntWidth,size_type_class>::value_type int_vector<fixedIntWidth,size_type_class>::get_int(size_type idx, const uint8_t len)const 01213 { 01214 #ifdef SDSL_DEBUG 01215 if (idx+len > m_size) { 01216 throw std::out_of_range("OUT_OF_RANGE_ERROR: int_vector::get_int(size_type, uint8_t); idx+len > size()!"); 01217 } 01218 if (len > 64) { 01219 throw std::out_of_range("OUT_OF_RANGE_ERROR: int_vector::get_int(size_type, uint8_t); len>64!"); 01220 } 01221 #endif 01222 return bit_magic::read_int(m_data+(idx>>6), idx&0x3F, len); 01223 } 01224 01225 template<uint8_t fixedIntWidth, class size_type_class> 01226 inline void int_vector<fixedIntWidth,size_type_class>::set_int(size_type idx, value_type x, const uint8_t len) 01227 { 01228 #ifdef SDSL_DEBUG 01229 if (idx+len > m_size) { 01230 throw std::out_of_range("OUT_OF_RANGE_ERROR: int_vector::set_int(size_type, uint8_t); idx+len > size()!"); 01231 } 01232 if (len > 64) { 01233 throw std::out_of_range("OUT_OF_RANGE_ERROR: int_vector::set_int(size_type, uint8_t); len>64!"); 01234 } 01235 #endif 01236 bit_magic::write_int(m_data+(idx>>6), x, idx&0x3F, len); 01237 } 01238 01239 template<uint8_t fixedIntWidth, class size_type_class> 01240 typename int_vector<fixedIntWidth,size_type_class>::size_type int_vector<fixedIntWidth,size_type_class>::size() const 01241 { 01242 return m_size/m_int_width; 01243 } 01244 01245 template<uint8_t fixedIntWidth, class size_type_class> 01246 typename int_vector<fixedIntWidth,size_type_class>::size_type int_vector<fixedIntWidth,size_type_class>::max_size() 01247 { 01248 return ((size_type)1)<<(sizeof(size_type)*8-6);// TODO: motivation for this expression 01249 } 01250 01251 template<uint8_t fixedIntWidth, class size_type_class> 01252 typename int_vector<fixedIntWidth,size_type_class>::size_type int_vector<fixedIntWidth,size_type_class>::bit_size() const 01253 { 01254 return m_size; 01255 } 01256 01257 template<uint8_t fixedIntWidth, class size_type_class> 01258 bool int_vector<fixedIntWidth,size_type_class>::empty() const 01259 { 01260 return m_size==0; 01261 } 01262 01263 template<uint8_t fixedIntWidth, class size_type_class> 01264 inline typename int_vector<fixedIntWidth,size_type_class>::size_type int_vector<fixedIntWidth,size_type_class>::capacity() const 01265 { 01266 return ((m_size+63)>>6)<<6; 01267 } 01268 01269 template<uint8_t fixedIntWidth, class size_type_class> 01270 const uint8_t int_vector<fixedIntWidth,size_type_class>::get_int_width()const 01271 { 01272 return m_int_width; 01273 } 01274 01275 template<uint8_t fixedIntWidth, class size_type_class> 01276 void int_vector<fixedIntWidth,size_type_class>::set_int_width(uint8_t width) 01277 { 01278 int_vector_trait<fixedIntWidth,size_type_class>::set_int_width(m_int_width, width); // delegate to trait function 01279 } 01280 01281 template<uint8_t fixedIntWidth, class size_type_class> 01282 inline typename int_vector<fixedIntWidth,size_type_class>::reference int_vector<fixedIntWidth,size_type_class>::operator[](const size_type& idx) 01283 { 01284 size_type i = idx * m_int_width; 01285 return reference(this->m_data + (i>>6), i&0x3F, m_int_width); 01286 } 01287 01288 // specialized [] operator for 64 bit access. 01289 template<> 01290 inline int_vector<64>::reference int_vector<64>::operator[](const size_type& idx) 01291 { 01292 return *(this->m_data+idx); 01293 } 01294 01295 // specialized [] operator for 32 bit access. 01296 template<> 01297 inline int_vector<32>::reference int_vector<32>::operator[](const size_type& idx) 01298 { 01299 return *(((uint32_t*)(this->m_data))+idx); 01300 } 01301 01302 // specialized [] operator for 16 bit access. 01303 template<> 01304 inline int_vector<16>::reference int_vector<16>::operator[](const size_type& idx) 01305 { 01306 return *(((uint16_t*)(this->m_data))+idx); 01307 } 01308 01309 // specialized [] operator for 8 bit access. 01310 template<> 01311 inline int_vector<8>::reference int_vector<8>::operator[](const size_type& idx) 01312 { 01313 return *(((uint8_t*)(this->m_data))+idx); 01314 } 01315 01316 template<uint8_t fixedIntWidth, class size_type_class> 01317 inline typename int_vector<fixedIntWidth,size_type_class>::const_reference int_vector<fixedIntWidth,size_type_class>::operator[](const size_type& idx)const 01318 { 01319 return get_int(idx * fixedIntWidth, fixedIntWidth); 01320 } 01321 01322 template<> 01323 inline int_vector<0>::const_reference int_vector<0>::operator[](const size_type& idx)const 01324 { 01325 return get_int(idx * m_int_width, m_int_width); 01326 } 01327 01328 template<> 01329 inline int_vector<64>::const_reference int_vector<64>::operator[](const size_type& idx)const 01330 { 01331 return *(this->m_data+idx); 01332 } 01333 01334 template<> 01335 inline int_vector<32>::const_reference int_vector<32>::operator[](const size_type& idx)const 01336 { 01337 return *(((uint32_t*)this->m_data)+idx); 01338 } 01339 01340 template<> 01341 inline int_vector<16>::const_reference int_vector<16>::operator[](const size_type& idx)const 01342 { 01343 return *(((uint16_t*)this->m_data)+idx); 01344 } 01345 01346 template<> 01347 inline int_vector<8>::const_reference int_vector<8>::operator[](const size_type& idx)const 01348 { 01349 return *(((uint8_t*)this->m_data)+idx); 01350 } 01351 01352 template<> 01353 inline int_vector<1>::const_reference int_vector<1>::operator[](const size_type& idx)const 01354 { 01355 return ((*(m_data+(idx>>6)))>>(idx&0x3F))&1; 01356 } 01357 01358 template<uint8_t fixedIntWidth, class size_type_class> 01359 inline const typename int_vector<fixedIntWidth,size_type_class>::iterator int_vector<fixedIntWidth,size_type_class>::begin() 01360 { 01361 // return iterator(this, 0); 01362 return int_vector_trait<fixedIntWidth,size_type_class>::begin(this, m_data); 01363 } 01364 01365 template<uint8_t fixedIntWidth, class size_type_class> 01366 inline const typename int_vector<fixedIntWidth,size_type_class>::iterator int_vector<fixedIntWidth,size_type_class>::end() 01367 { 01368 // return iterator(this, m_int_width*(m_size/m_int_width)); 01369 return int_vector_trait<fixedIntWidth,size_type_class>::end(this, m_data, (m_size/m_int_width)); 01370 } 01371 01372 template<uint8_t fixedIntWidth, class size_type_class> 01373 inline const typename int_vector<fixedIntWidth,size_type_class>::const_iterator int_vector<fixedIntWidth,size_type_class>::begin()const 01374 { 01375 // return const_iterator(this, 0); 01376 return int_vector_trait<fixedIntWidth,size_type_class>::begin(this, m_data); 01377 } 01378 01379 template<uint8_t fixedIntWidth, class size_type_class> 01380 inline const typename int_vector<fixedIntWidth,size_type_class>::const_iterator int_vector<fixedIntWidth,size_type_class>::end()const 01381 { 01382 // return const_iterator(this, m_int_width*(m_size/m_int_width)); 01383 return int_vector_trait<fixedIntWidth,size_type_class>::end(this, m_data, (m_size/m_int_width)); 01384 } 01385 01386 template<uint8_t fixedIntWidth, class size_type_class> 01387 bool int_vector<fixedIntWidth,size_type_class>::operator==(const int_vector<fixedIntWidth>& v)const 01388 { 01389 if (capacity() != v.capacity()) 01390 return false; 01391 if (bit_size() != v.bit_size()) 01392 return false; 01393 if (empty()) 01394 return true; 01395 const uint64_t* data1 = v.data(); 01396 const uint64_t* data2 = data(); 01397 for (size_type i=0; i < (capacity()>>6)-1; ++i) { 01398 if (*(data1++) != *(data2++)) 01399 return false; 01400 } 01401 int8_t l = 64-(capacity()-bit_size()); 01402 return ((*data1)&bit_magic::Li1Mask[l])==((*data2)&bit_magic::Li1Mask[l]); 01403 } 01404 01405 template<uint8_t fixedIntWidth, class size_type_class> 01406 bool int_vector<fixedIntWidth,size_type_class>::operator<(const int_vector& v)const 01407 { 01408 size_type min_size = size(); 01409 if (min_size > v.size()) 01410 min_size = v.size(); 01411 for (typename int_vector<fixedIntWidth,size_type_class>::const_iterator it = begin(), end = begin()+min_size, it_v = v.begin(); it!=end; ++it, ++it_v) { 01412 if (*it == *it_v) 01413 continue; 01414 else 01415 return *it < *it_v; 01416 } 01417 return size() < v.size(); 01418 } 01419 01420 template<uint8_t fixedIntWidth, class size_type_class> 01421 bool int_vector<fixedIntWidth,size_type_class>::operator>(const int_vector& v)const 01422 { 01423 size_type min_size = size(); 01424 if (min_size > v.size()) 01425 min_size = v.size(); 01426 for (typename int_vector<fixedIntWidth,size_type_class>::const_iterator it = begin(), end = begin()+min_size, it_v = v.begin(); it!=end; ++it, ++it_v) { 01427 if (*it == *it_v) 01428 continue; 01429 else 01430 return *it > *it_v; 01431 } 01432 return size() > v.size(); 01433 } 01434 01435 template<uint8_t fixedIntWidth, class size_type_class> 01436 bool int_vector<fixedIntWidth,size_type_class>::operator<=(const int_vector& v)const 01437 { 01438 return *this==v or *this<v; 01439 } 01440 01441 template<uint8_t fixedIntWidth, class size_type_class> 01442 bool int_vector<fixedIntWidth,size_type_class>::operator>=(const int_vector& v)const 01443 { 01444 return *this==v or *this>v; 01445 } 01446 01447 template<uint8_t fixedIntWidth, class size_type_class> 01448 bool int_vector<fixedIntWidth,size_type_class>::operator!=(const int_vector& v)const 01449 { 01450 return !(*this==v); 01451 } 01452 01453 template<class size_type_class> 01454 size_type_class _sdsl_serialize_size_and_int_width(std::ostream& out, uint8_t fixed_int_width, uint8_t int_width, size_type_class size) 01455 { 01456 size_type_class written_bytes = 0; 01457 out.write((char*) &size, sizeof(size)); 01458 written_bytes += sizeof(size); 01459 if (fixed_int_width == 0) { 01460 out.write((char*) &int_width, sizeof(int_width)); 01461 written_bytes += sizeof(int_width); 01462 } 01463 return written_bytes; 01464 } 01465 01466 template<uint8_t fixedIntWidth, class size_type_class> 01467 typename int_vector<fixedIntWidth,size_type_class>::size_type int_vector<fixedIntWidth,size_type_class>::serialize(std::ostream& out, 01468 structure_tree_node* v, 01469 string name, 01470 bool write_fixed_as_variable) const 01471 { 01472 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 01473 size_type written_bytes = 0; 01474 if (fixedIntWidth > 0 and write_fixed_as_variable) { 01475 written_bytes += _sdsl_serialize_size_and_int_width(out, 0, fixedIntWidth, m_size); 01476 } else { 01477 written_bytes += _sdsl_serialize_size_and_int_width(out, fixedIntWidth, m_int_width, m_size); 01478 } 01479 uint64_t* p = m_data; 01480 const static size_type SDSL_BLOCK_SIZE = (1<<28); 01481 size_type idx = 0; 01482 while (idx+SDSL_BLOCK_SIZE < (capacity()>>6)) { 01483 out.write((char*) p, SDSL_BLOCK_SIZE*sizeof(uint64_t)); 01484 written_bytes += SDSL_BLOCK_SIZE*sizeof(uint64_t); 01485 p += SDSL_BLOCK_SIZE; 01486 idx += SDSL_BLOCK_SIZE; 01487 } 01488 out.write((char*) p, ((capacity()>>6)-idx)*sizeof(uint64_t)); 01489 written_bytes += ((capacity()>>6)-idx)*sizeof(uint64_t); 01490 structure_tree::add_size(child, written_bytes); 01491 return written_bytes; 01492 } 01493 01494 template<uint8_t fixedIntWidth, class size_type_class> 01495 void int_vector<fixedIntWidth,size_type_class>::load(std::istream& in) 01496 { 01497 size_type size; 01498 int_vector_trait<fixedIntWidth, size_type_class>::read_header(size, m_int_width, in); 01499 01500 // util::read_member(size, in); 01501 // if( fixedIntWidth == 0 ){ 01502 // in.read((char *) &intWidth, sizeof(m_int_width)); 01503 // } 01504 // else 01505 // intWidth = fixedIntWidth; 01506 // set_int_width(intWidth); 01507 01508 bit_resize(size); 01509 uint64_t* p = m_data; 01510 const static size_type SDSL_BLOCK_SIZE = (1<<28); // TODO: is this the loading bottleneck? 01511 size_type idx = 0; 01512 while (idx+SDSL_BLOCK_SIZE < (capacity()>>6)) { 01513 in.read((char*) p, SDSL_BLOCK_SIZE*sizeof(uint64_t)); 01514 p += SDSL_BLOCK_SIZE; 01515 idx += SDSL_BLOCK_SIZE; 01516 } 01517 in.read((char*) p, ((capacity()>>6)-idx)*sizeof(uint64_t)); 01518 } 01519 01521 template<class size_type_class> 01522 class char_array_serialize_wrapper 01523 { 01524 public: 01525 typedef size_type_class size_type; 01526 private: 01527 01528 size_type m_n; // number of char 01529 const unsigned char* m_cp; 01530 01531 public: 01532 01533 char_array_serialize_wrapper(const unsigned char* c=NULL, size_type n=0):m_n(n), m_cp(c) {} 01534 01535 // char_array_serialize_wrapper(const char* c=NULL, size_type n=0):m_n(n), m_cp(c){} 01536 01537 size_type serialize(std::ostream& out) const { 01538 size_type size = m_n*8; // number of bits 01539 size_type written_bytes = 0; 01540 written_bytes += _sdsl_serialize_size_and_int_width(out, 8, 8, size); 01541 const char* p = (const char*)m_cp; 01542 const static size_type SDSL_BLOCK_SIZE = (1<<22); 01543 size_type idx = 0; 01544 while (idx+SDSL_BLOCK_SIZE < m_n) { 01545 out.write(p, SDSL_BLOCK_SIZE); 01546 written_bytes += SDSL_BLOCK_SIZE; 01547 p += SDSL_BLOCK_SIZE; 01548 idx += SDSL_BLOCK_SIZE; 01549 } 01550 // now: m_n-idx <= SDSL_BLOCK_SIZE 01551 out.write(p , m_n-idx); 01552 written_bytes += m_n-idx; 01553 uint32_t r= (sizeof(uint64_t)-(m_n % sizeof(uint64_t))) % sizeof(uint64_t); 01554 out.write("\0\0\0\0\0\0\0\0", r); 01555 written_bytes += r; 01556 return written_bytes; 01557 } 01558 01559 }; 01560 01562 template<uint8_t fixedIntWidth, class size_type_class> 01563 class int_vector_file_buffer 01564 { 01565 public: 01566 typedef typename int_vector<fixedIntWidth, size_type_class>::size_type size_type; 01567 typedef typename int_vector<fixedIntWidth, size_type_class>::value_type value_type; 01568 typedef typename int_vector<fixedIntWidth, size_type_class>::const_reference const_reference; 01569 typedef typename int_vector<fixedIntWidth, size_type_class>::reference reference; 01570 typedef typename int_vector<fixedIntWidth, size_type_class>::int_width_type int_width_type; 01571 01572 private: 01573 01574 std::fstream m_in; 01575 uint64_t* m_buf; 01576 size_type m_off; // offset in the first 64bit word of the buffer 01577 size_type m_read_values; // number of values read in the last buffer operation 01578 size_type m_len; 01579 size_type m_int_vector_size; 01580 size_type m_read_values_sum; 01581 int_width_type m_int_width; 01582 std::string m_file_name; 01583 bool m_load_from_plain; 01584 01585 void load_size_and_width() { 01586 int_vector_trait<fixedIntWidth, size_type_class>::read_header(m_int_vector_size, m_int_width, m_in); 01587 m_int_vector_size/=m_int_width; 01588 } 01589 01590 size_type words_to_read(size_type len) { 01591 if (len == 0) 01592 return 0; 01593 size_type last_off = (m_read_values_sum*m_int_width)%64; 01594 assert(((64-last_off)%64) < m_int_width); 01595 return ((len*m_int_width - ((64-last_off)%64))+63)/64; 01596 } 01597 01598 void init() { 01599 m_int_vector_size = 0; 01600 m_off = 0; 01601 m_read_values = 0; 01602 m_read_values_sum = 0; 01603 } 01604 01605 public: 01606 01607 const size_type& int_vector_size; 01608 const uint8_t& int_width; 01609 const std::string& file_name; 01610 01612 /* 01613 * \param f_file_name File which contains the int_vector. 01614 * \param len Length of the buffer in elements. 01615 */ 01616 int_vector_file_buffer(const char* f_file_name=NULL, size_type len=1000000, uint8_t int_width=0):m_in(), m_buf(NULL), m_off(0), m_read_values(0), m_len(0), m_int_vector_size(0), m_read_values_sum(0), m_int_width(fixedIntWidth), m_file_name(), m_load_from_plain(false), int_vector_size(m_int_vector_size), int_width(m_int_width), file_name(m_file_name) { 01617 m_load_from_plain = false; 01618 int_vector_trait<fixedIntWidth, size_type_class>::set_int_width(m_int_width, int_width); 01619 m_len = len; 01620 init(); 01621 if (f_file_name == NULL) { 01622 return; 01623 } 01624 m_file_name = f_file_name; 01625 m_in.open(m_file_name.c_str(), std::ifstream::in); 01626 if (m_in.is_open()) { 01627 load_size_and_width(); 01628 m_buf = new uint64_t[(m_len*m_int_width+63)/64 + 2]; 01629 } else { 01630 m_buf = NULL; 01631 } 01632 } 01633 01634 // initialize int_vector_file_buffer from a plain file 01635 // works only for fixedIntWidth = 8 // TODO extent to 1,16,32,64 01636 bool load_from_plain(const char* f_file_name, size_type len=1000000, uint8_t int_width=0) { 01637 if (f_file_name == NULL) { 01638 std::logic_error("ERROR: int_vector_file_buffer::load_from_plain expects a file name."); 01639 return false; 01640 } 01641 if (fixedIntWidth != 8) { 01642 std::logic_error("ERROR: int_vector_file_buffer: load_from_plain is only implemented for fixedIntWidth=8."); 01643 return false; 01644 } 01645 m_load_from_plain = true; 01646 int_vector_trait<fixedIntWidth, size_type_class>::set_int_width(m_int_width, int_width); 01647 m_len = len; 01648 m_file_name = f_file_name; 01649 m_int_vector_size = get_file_size(m_file_name.c_str()); 01650 m_in.open(m_file_name.c_str(), std::ifstream::in); 01651 if (m_in.is_open()) { 01652 m_buf = new uint64_t[(m_len*m_int_width+63)/64 + 2]; 01653 } else { 01654 m_buf = NULL; 01655 return false; 01656 } 01657 return true; 01658 } 01659 01660 bool reset(size_type new_buf_len=0) { 01661 m_in.clear(); 01662 if (!m_in.seekg(0, std::ios::beg)) { 01663 throw std::ios_base::failure("int_vector_file_buffer: reset()"); 01664 return false; 01665 }; 01666 init(); 01667 if (!m_load_from_plain) { 01668 load_size_and_width(); 01669 } else { 01670 m_int_vector_size = get_file_size(m_file_name.c_str()); 01671 } 01672 if (new_buf_len > 0 and new_buf_len != m_len) { 01673 if (m_buf != NULL) 01674 delete [] m_buf; 01675 m_len = new_buf_len; 01676 m_buf = new uint64_t[(m_len*m_int_width+63)/64 + 2]; 01677 } 01678 return true; 01679 } 01680 01681 bool valid() { 01682 return m_buf!=NULL; 01683 } 01684 01685 size_type load_next_block() { 01686 if (m_read_values_sum == m_int_vector_size) { 01687 return 0; 01688 } 01689 assert(m_read_values_sum < m_int_vector_size); 01690 size_type values_to_read = m_len; 01691 if (values_to_read + m_read_values_sum > m_int_vector_size) { 01692 values_to_read = m_int_vector_size - m_read_values_sum; 01693 } 01694 if (((m_read_values_sum*m_int_width)%64) == 0) { //if the new offset == 0 01695 m_in.read((char*) m_buf, words_to_read(values_to_read) * sizeof(uint64_t)); 01696 m_off = 0; 01697 } else { // the new offset != 0 01698 m_buf[0] = m_buf[(m_read_values*m_int_width+m_off)/64 ]; 01699 m_in.read((char*)(m_buf+1), words_to_read(values_to_read) * sizeof(uint64_t)); 01700 m_off = (m_read_values_sum*m_int_width)%64; 01701 } 01702 m_read_values_sum += values_to_read; 01703 m_read_values = values_to_read; 01704 return m_read_values; 01705 } 01706 01707 /* 01708 void write_current_block(){ 01709 if( m_read_values > 0 ){ // if we there are some values in the buffer 01710 01711 } 01712 } 01713 */ 01714 01716 /* \pre \f$ i < len \f$ 01717 */ 01718 value_type operator[](const size_type i)const { 01719 assert(i<m_len); 01720 size_type idx = i*m_int_width+m_off; 01721 return bit_magic::read_int(m_buf + (idx>>6), idx&0x3F, m_int_width); 01722 } 01723 01724 void set_int(const size_type i, uint64_t x) { 01725 size_type idx = i*m_int_width+m_off; 01726 bit_magic::write_int(m_buf + (idx>>6), x, idx&0x3F, m_int_width); 01727 } 01728 01729 const uint64_t* data()const { 01730 return m_buf; 01731 } 01732 //TODO: proxy object for writting the int buffer 01733 /* 01734 reference operator[](const size_type i){ 01735 assert(i<m_len); 01736 size_type idx = i*m_int_width+m_off; 01737 return reference(m_buf + (idx>>6), idx&0x3F, m_int_width); 01738 } 01739 */ 01740 01741 ~int_vector_file_buffer() { 01742 if (m_in.is_open()) { 01743 m_in.close(); // close ifstream 01744 delete [] m_buf; 01745 } 01746 } 01747 }; 01748 01749 /* 01750 // specialized [] operator for 8 bit access. 01751 template<> 01752 inline int_vector_file_buffer<8, uint64_t>::reference int_vector_file_buffer<8, uint64_t>::operator[](const size_type idx){ 01753 assert(idx<m_len); 01754 return *(((uint8_t*)(m_buf))+idx); 01755 } 01756 01757 01758 // specialized [] operator for 8 bit access. 01759 template<> 01760 inline int_vector_file_buffer<8, uint32_t>::reference int_vector_file_buffer<8, uint32_t>::operator[](const size_type idx){ 01761 assert(idx<m_len); 01762 return *(((uint8_t*)(m_buf))+idx); 01763 } 01764 */ 01765 01766 }// end namespace sdsl 01767 01768 #endif // end file