SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2009 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 /* Changes: 00022 - Removed unnecessary rank support 00023 */ 00024 #ifndef INCLUDED_SDSL_LCP_SUPPORT_SADA 00025 #define INCLUDED_SDSL_LCP_SUPPORT_SADA 00026 00027 #include "lcp.hpp" 00028 #include "int_vector.hpp" 00029 #include "algorithms.hpp" 00030 #include "iterators.hpp" 00031 #include "csa_sada.hpp" // for standard template initialization of lcp_support_sada 00032 #include "select_support.hpp" // for standard template initialization of lcp_support_sada 00033 #include <iostream> 00034 #include <algorithm> 00035 #include <cassert> 00036 #include <cstring> // for strlen 00037 #include <iomanip> 00038 #include <iterator> 00039 00040 namespace sdsl 00041 { 00042 00043 00045 00055 template<class Csa = csa_sada<>, class BitVector = bit_vector, class SelectSupport = typename BitVector::select_1_type> 00056 class _lcp_support_sada 00057 { 00058 public: 00059 typedef typename Csa::value_type value_type; // STL Container requirement 00060 typedef random_access_const_iterator<_lcp_support_sada> const_iterator;// STL Container requirement 00061 typedef const_iterator iterator; // STL Container requirement 00062 typedef const value_type const_reference; 00063 typedef const_reference reference; 00064 typedef const_reference* pointer; 00065 typedef const pointer const_pointer; 00066 typedef int_vector<>::size_type size_type; // STL Container requirement 00067 typedef ptrdiff_t difference_type; // STL Container requirement 00068 typedef BitVector bit_vector_type; 00069 typedef Csa csa_type; 00070 00071 00072 typedef lcp_permuted_tag lcp_category; 00073 00074 enum { fast_access = 0, 00075 text_order = 1, 00076 sa_order = 0 00077 }; 00078 00079 template<class Cst> // template inner class which is used in CSTs to parametrize lcp classes 00080 class type // with information about the CST. Thanks Stefan Arnold! (2011-03-02) 00081 { 00082 public: 00083 typedef _lcp_support_sada lcp_type; 00084 }; 00085 00086 private: 00087 // 00088 const Csa* m_csa; 00089 bit_vector_type m_data; 00090 SelectSupport m_select_support; 00091 00092 void copy(const _lcp_support_sada& lcp_c) { 00093 m_csa = lcp_c.m_csa; 00094 m_data = lcp_c.m_data; 00095 m_select_support = lcp_c.m_select_support; 00096 m_select_support.set_vector(&m_data); 00097 } 00098 public: 00099 const Csa*& csa; 00101 _lcp_support_sada(): csa(m_csa) {} 00103 ~_lcp_support_sada() {} 00105 _lcp_support_sada(const _lcp_support_sada& lcp_c):csa(m_csa) { 00106 copy(lcp_c); 00107 } 00108 00110 template<class Text, class Sa> 00111 _lcp_support_sada(const Text& text, const Sa& sa, const Csa* csa); 00112 00113 00115 template<uint8_t int_width, class size_type_class, uint8_t int_width_1, class size_type_class_1> 00116 _lcp_support_sada(int_vector_file_buffer<int_width, size_type_class>& lcp_buf, 00117 int_vector_file_buffer<int_width_1, size_type_class_1>& isa_buf, 00118 const Csa* f_csa); 00119 00120 template<class Text, class Sa> 00121 void construct(const Text& text, const Sa& sa, const Csa* csa); 00122 00123 template<uint8_t int_width, class size_type_class, uint8_t int_width_1, class size_type_class_1> 00124 void construct(int_vector_file_buffer<int_width, size_type_class>& lcp_buf, 00125 int_vector_file_buffer<int_width_1, size_type_class_1>& isa_buf, 00126 const Csa* f_csa); 00127 00128 00129 00130 00131 void set_csa(const Csa* f_csa) { 00132 if (f_csa==NULL) { 00133 std::cerr<<"_lcp_support_sada: Warnung: set m_csa to NULL"<<std::endl; 00134 } 00135 m_csa = f_csa; 00136 } 00137 00139 00142 size_type size()const { 00143 return m_csa->size(); 00144 } 00145 00147 00150 static size_type max_size() { 00151 return Csa::max_size(); 00152 } 00153 00155 00158 bool empty()const { 00159 return m_csa->empty(); 00160 } 00161 00163 00171 void swap(_lcp_support_sada& lcp_c); 00172 00174 00177 const_iterator begin()const; 00178 00180 00183 const_iterator end()const; 00184 00186 00190 inline value_type operator[](size_type i)const; 00191 00193 00196 _lcp_support_sada& operator=(const _lcp_support_sada& lcp_c); 00197 00199 00204 bool operator==(const _lcp_support_sada& lcp_c)const; 00205 00207 00212 bool operator!=(const _lcp_support_sada& lcp_c)const; 00213 00215 00218 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const; 00219 00221 00224 void load(std::istream& in, const Csa* csa); 00225 }; 00226 00227 // == template functions == 00228 00229 template<class Csa, class BitVector, class SelectSupport> 00230 template<class Text, class Sa> 00231 _lcp_support_sada<Csa, BitVector, SelectSupport>::_lcp_support_sada(const Text& text, const Sa& sa, const Csa* csa):csa(m_csa) 00232 { 00233 construct(text, sa, csa); 00234 } 00235 00236 template<class Csa, class BitVector, class SelectSupport> 00237 template<class Text, class Sa> 00238 void _lcp_support_sada<Csa, BitVector, SelectSupport>::construct(const Text& text, const Sa& sa, const Csa* csa) 00239 { 00240 set_csa(csa); 00241 #ifdef SDSL_DEBUG 00242 std::cerr<<"start building _lcp_support_sada"<<std::endl; 00243 #endif 00244 bit_vector data = bit_vector(2*sa.size(), 0); 00245 size_type data_cnt = 0; 00246 for (typename Csa::size_type i=0,j=0, sa_1 = sa(0), l=0, oldl=1; i < sa.size(); ++i) { 00247 if (l) --l; 00248 if (sa_1) { 00249 j = sa[sa_1-1]; 00250 while (i+l < sa.size() and j+l < sa.size() and text[i+l]==text[j+l]) ++l; 00251 } else { 00252 l = 0; 00253 } 00254 data_cnt += l-oldl+1; 00255 data[data_cnt]=1; 00256 ++data_cnt; 00257 sa_1 = sa.psi[sa_1]; 00258 oldl = l; 00259 } 00260 data.resize(data_cnt); 00261 util::assign(m_data, data); 00262 util::init_support(m_select_support, &m_data); 00263 #ifdef SDSL_DEBUG 00264 std::cerr<<"finished building _lcp_support_sada"<<std::endl; 00265 #endif 00266 } 00267 00268 template<class Csa, class BitVector, class SelectSupport> 00269 template<uint8_t int_width, class size_type_class, uint8_t int_width_1, class size_type_class_1> 00270 _lcp_support_sada<Csa, BitVector, SelectSupport>::_lcp_support_sada(int_vector_file_buffer<int_width, size_type_class>& lcp_buf, 00271 int_vector_file_buffer<int_width_1, size_type_class_1>& isa_buf, 00272 const Csa* f_csa):csa(m_csa) 00273 { 00274 construct(lcp_buf, isa_buf, f_csa); 00275 } 00276 00277 template<class Csa, class BitVector, class SelectSupport> 00278 template<uint8_t int_width, class size_type_class, uint8_t int_width_1, class size_type_class_1> 00279 void _lcp_support_sada<Csa, BitVector, SelectSupport>::construct(int_vector_file_buffer<int_width, size_type_class>& lcp_buf, 00280 int_vector_file_buffer<int_width_1, size_type_class_1>& isa_buf, 00281 const Csa* f_csa) 00282 { 00283 typedef typename Csa::size_type size_type; 00284 set_csa(f_csa); 00285 #ifdef SDSL_DEBUG 00286 std::cerr<<"start building _lcp_support_sada"<<std::endl; 00287 #endif 00288 int_vector<int_width, size_type_class> lcp; 00289 util::load_from_file(lcp, lcp_buf.file_name.c_str()); 00290 isa_buf.reset(); 00291 size_type n = lcp.size(); 00292 bit_vector data = bit_vector(2*n, 0); 00293 size_type data_cnt=0; 00294 for (size_type i=0, r_sum=0, r = isa_buf.load_next_block(), l=0, old_l=1; r_sum < n;) { 00295 for (; i < r_sum+r; ++i) { 00296 l = lcp[isa_buf[i-r_sum]]; 00297 data_cnt += l + 1 - old_l; 00298 data[data_cnt++] = 1; 00299 old_l = l; 00300 } 00301 r_sum += r; 00302 r = isa_buf.load_next_block(); 00303 } 00304 data.resize(data_cnt); 00305 util::assign(m_data, data); 00306 util::init_support(m_select_support, &m_data); 00307 #ifdef SDSL_DEBUG 00308 std::cerr<<"finished building _lcp_support_sada"<<std::endl; 00309 #endif 00310 } 00311 00312 template<class Csa, class BitVector, class SelectSupport> 00313 void _lcp_support_sada<Csa, BitVector, SelectSupport>::swap(_lcp_support_sada& lcp_c) 00314 { 00315 m_data.swap(lcp_c.m_data); 00316 util::swap_support(m_select_support, lcp_c.m_select_support, &m_data, &(lcp_c.m_data)); 00317 } 00318 00319 template<class Csa, class BitVector, class SelectSupport> 00320 inline typename _lcp_support_sada<Csa, BitVector, SelectSupport>::value_type _lcp_support_sada<Csa, BitVector, SelectSupport>::operator[](size_type i)const 00321 { 00322 size_type j = (*m_csa)[i]; 00323 size_type s = m_select_support.select(j+1); 00324 return s-(j<<1); 00325 } 00326 00327 00328 template<class Csa, class BitVector, class SelectSupport> 00329 typename _lcp_support_sada<Csa, BitVector, SelectSupport>::size_type _lcp_support_sada<Csa, BitVector, SelectSupport>::serialize(std::ostream& out, structure_tree_node* v, std::string name)const 00330 { 00331 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00332 size_type written_bytes = 0; 00333 written_bytes += m_data.serialize(out, child, "data"); 00334 written_bytes += m_select_support.serialize(out, child, "select_support"); 00335 structure_tree::add_size(child, written_bytes); 00336 return written_bytes; 00337 } 00338 00339 template<class Csa, class BitVector, class SelectSupport> 00340 void _lcp_support_sada<Csa, BitVector, SelectSupport>::load(std::istream& in, const Csa* csa) 00341 { 00342 m_csa = csa; 00343 m_data.load(in); 00344 m_select_support.load(in, &m_data); 00345 } 00346 00347 00348 template<class Csa, class BitVector, class SelectSupport> 00349 _lcp_support_sada<Csa, BitVector, SelectSupport>& _lcp_support_sada<Csa, BitVector, SelectSupport>::operator=(const _lcp_support_sada& lcp_c) 00350 { 00351 if (this != &lcp_c) { 00352 copy(lcp_c); 00353 } 00354 return *this; 00355 } 00356 00357 00358 template<class Csa, class BitVector, class SelectSupport> 00359 bool _lcp_support_sada<Csa, BitVector, SelectSupport>::operator==(const _lcp_support_sada& lcp_c)const 00360 { 00361 if (this == &lcp_c) 00362 return true; 00363 return m_csa == lcp_c.m_csa and m_data == lcp_c.m_data and m_select_support == lcp_c.m_select_support; 00364 } 00365 00366 template<class Csa, class BitVector, class SelectSupport> 00367 bool _lcp_support_sada<Csa, BitVector, SelectSupport>::operator!=(const _lcp_support_sada& lcp_c)const 00368 { 00369 return !(*this == lcp_c); 00370 } 00371 00372 template<class Csa, class BitVector, class SelectSupport> 00373 typename _lcp_support_sada<Csa, BitVector, SelectSupport>::const_iterator _lcp_support_sada<Csa, BitVector, SelectSupport>::begin()const 00374 { 00375 return const_iterator(this, 0); 00376 } 00377 00378 template<class Csa, class BitVector, class SelectSupport> 00379 typename _lcp_support_sada<Csa, BitVector, SelectSupport>::const_iterator _lcp_support_sada<Csa, BitVector, SelectSupport>::end()const 00380 { 00381 return const_iterator(this, size()); 00382 } 00383 00384 00386 template<class BitVector = bit_vector, class SelectSupport = typename BitVector::select_1_type> 00387 class lcp_support_sada 00388 { 00389 public: 00390 template<class Cst> // template inner class which is used in CSTs to parametrize lcp classes 00391 class type // with information about the CST. Thanks Stefan Arnold! (2011-03-02) 00392 { 00393 public: 00394 typedef _lcp_support_sada<typename Cst::csa_type, BitVector, SelectSupport> lcp_type; 00395 }; 00396 }; 00397 00398 } // end namespace sdsl 00399 00400 #endif