SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2010 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00017 /* \file lcp_vlc.hpp 00018 \brief lcp_vlc.hpp contains an implementation of a (compressed) lcp array. 00019 \author Simon Gog 00020 */ 00021 #ifndef INCLUDED_SDSL_LCP_VLC 00022 #define INCLUDED_SDSL_LCP_VLC 00023 00024 #include "lcp.hpp" 00025 #include "vlc_vector.hpp" 00026 #include "int_vector.hpp" 00027 #include "algorithms.hpp" 00028 #include "iterators.hpp" 00029 #include "bitmagic.hpp" 00030 #include <iostream> 00031 #include <algorithm> // for lower_bound 00032 #include <cassert> 00033 #include <cstring> // for strlen 00034 #include <iomanip> 00035 #include <iterator> 00036 #include <vector> 00037 #include <utility> // for pair 00038 00039 namespace sdsl 00040 { 00041 00042 // A class for the compressed version of lcp information of an suffix array 00043 /* We use variable length code for the lcp entries. 00044 * \par Time complexity 00045 * - \f$\Order{d}\f$, where d is the sample density 00046 */ 00047 template<class vlc_vec_type = vlc_vector<> > 00048 class lcp_vlc 00049 { 00050 public: 00051 typedef typename vlc_vec_type::value_type value_type; // STL Container requirement 00052 typedef random_access_const_iterator<lcp_vlc> const_iterator;// STL Container requirement 00053 typedef const_iterator iterator; // STL Container requirement 00054 typedef const value_type const_reference; 00055 typedef const_reference reference; 00056 typedef const_reference* pointer; 00057 typedef const pointer const_pointer; 00058 typedef typename vlc_vec_type::size_type size_type; // STL Container requirement 00059 typedef typename vlc_vec_type::difference_type difference_type; // STL Container requirement 00060 00061 typedef lcp_plain_tag lcp_category; 00062 00063 enum { fast_access = 0, 00064 text_order = 0, 00065 sa_order = 1 00066 }; 00067 00068 private: 00069 00070 vlc_vec_type m_vec; 00071 00072 void copy(const lcp_vlc& lcp_c) { 00073 m_vec = lcp_c.m_vec; 00074 } 00075 00076 public: 00078 lcp_vlc() {} 00080 ~lcp_vlc() {} 00082 lcp_vlc(const lcp_vlc& lcp_c) { 00083 copy(lcp_c); 00084 } 00085 00087 template<class Text, class Sa> 00088 lcp_vlc(const Text& text, const Sa& sa); 00089 00091 template<uint8_t int_width, class size_type_class> 00092 lcp_vlc(int_vector_file_buffer<int_width, size_type_class>& lcp_buf); 00093 00094 template<class Text, class Sa> 00095 void construct(const Text& text, const Sa& sa); 00096 00097 template<uint8_t int_width, class size_type_class> 00098 void construct(int_vector_file_buffer<int_width, size_type_class>& lcp_buf); 00099 00101 00104 size_type size()const { 00105 return m_vec.size(); 00106 } 00107 00109 00112 static size_type max_size() { 00113 return vlc_vec_type::max_size(); 00114 } 00115 00117 00120 bool empty()const { 00121 return m_vec.empty(); 00122 } 00123 00125 00133 void swap(lcp_vlc& lcp_c); 00134 00136 00139 const_iterator begin()const; 00140 00142 00145 const_iterator end()const; 00146 00148 00152 inline value_type operator[](size_type i)const; 00153 00155 00158 lcp_vlc& operator=(const lcp_vlc& lcp_c); 00159 00161 00166 bool operator==(const lcp_vlc& lcp_c)const; 00167 00169 00174 bool operator!=(const lcp_vlc& lcp_c)const; 00175 00177 00180 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const; 00181 00183 00185 void load(std::istream& in); 00186 }; 00187 00188 // == template functions == 00189 00190 template<class vlc_vec_type> 00191 template<class Text, class Sa> 00192 lcp_vlc<vlc_vec_type>::lcp_vlc(const Text& text, const Sa& sa) 00193 { 00194 construct(text, sa); 00195 } 00196 00197 template<class vlc_vec_type> 00198 template<class Text, class Sa> 00199 void lcp_vlc<vlc_vec_type>::construct(const Text& text, const Sa& sa) 00200 { 00201 if (sa.size() == 0) { 00202 return; 00203 } 00204 int_vector<> lcp; 00205 lcp.set_int_width(bit_magic::l1BP(sa.size()) + 1); 00206 lcp.resize(sa.size()); 00207 00208 // use Kasai algorithm to compute the lcp values 00209 typename Sa::size_type sa_1 = sa(0), l=0, max_l=0, max_big_idx=0; 00210 for (typename Sa::size_type i=0,j=0; i < sa.size(); ++i) { 00211 if (l) --l; 00212 if (sa_1) { 00213 j = sa[sa_1-1]; 00214 while (i+l < sa.size() and j+l < sa.size() and text[i+l]==text[j+l]) ++l; 00215 } else { 00216 l = 0; 00217 } 00218 lcp[ sa_1 ] = l; 00219 sa_1 = sa.psi[sa_1]; 00220 } 00221 m_vec = vlc_vec_type(lcp); 00222 } 00223 00224 00225 template<class vlc_vec_type> 00226 template<uint8_t int_width, class size_type_class> 00227 lcp_vlc<vlc_vec_type>::lcp_vlc(int_vector_file_buffer<int_width, size_type_class>& lcp_buf) 00228 { 00229 construct(lcp_buf); 00230 } 00231 00232 template<class vlc_vec_type> 00233 template<uint8_t int_width, class size_type_class> 00234 void lcp_vlc<vlc_vec_type>::construct(int_vector_file_buffer<int_width, size_type_class>& lcp_buf) 00235 { 00236 m_vec.init(lcp_buf); 00237 } 00238 00239 template<class vlc_vec_type> 00240 void lcp_vlc<vlc_vec_type>::swap(lcp_vlc& lcp_c) 00241 { 00242 m_vec.swap(lcp_c.m_vec); 00243 } 00244 00245 template<class vlc_vec_type> 00246 inline typename lcp_vlc<vlc_vec_type>::value_type lcp_vlc<vlc_vec_type>::operator[](size_type i)const 00247 { 00248 return m_vec[i]; 00249 } 00250 00251 00252 template<class vlc_vec_type> 00253 typename lcp_vlc<vlc_vec_type>::size_type lcp_vlc<vlc_vec_type>::serialize(std::ostream& out, structure_tree_node* v, std::string name)const 00254 { 00255 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00256 size_type written_bytes = 0; 00257 written_bytes += m_vec.serialize(out, child, "vec"); 00258 structure_tree::add_size(child, written_bytes); 00259 return written_bytes; 00260 } 00261 00262 template<class vlc_vec_type> 00263 void lcp_vlc<vlc_vec_type>::load(std::istream& in) 00264 { 00265 m_vec.load(in); 00266 } 00267 00268 00269 template<class vlc_vec_type> 00270 lcp_vlc<vlc_vec_type>& lcp_vlc<vlc_vec_type>::operator=(const lcp_vlc& lcp_c) 00271 { 00272 if (this != &lcp_c) { 00273 copy(lcp_c); 00274 } 00275 return *this; 00276 } 00277 00278 00279 template<class vlc_vec_type> 00280 bool lcp_vlc<vlc_vec_type>::operator==(const lcp_vlc& lcp_c)const 00281 { 00282 if (this == &lcp_c) 00283 return true; 00284 return m_vec == lcp_c.m_vec; 00285 } 00286 00287 template<class vlc_vec_type> 00288 bool lcp_vlc<vlc_vec_type>::operator!=(const lcp_vlc& lcp_c)const 00289 { 00290 return !(*this == lcp_c); 00291 } 00292 00293 template<class vlc_vec_type> 00294 typename lcp_vlc<vlc_vec_type>::const_iterator lcp_vlc<vlc_vec_type>::begin()const 00295 { 00296 return const_iterator(this, 0); 00297 } 00298 00299 template<class vlc_vec_type> 00300 typename lcp_vlc<vlc_vec_type>::const_iterator lcp_vlc<vlc_vec_type>::end()const 00301 { 00302 return const_iterator(this, size()); 00303 } 00304 00305 00306 00307 } // end namespace sdsl 00308 00309 #endif