SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2009 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef INCLUDED_SDSL_LCP_BITCOMPRESSED 00022 #define INCLUDED_SDSL_LCP_BITCOMPRESSED 00023 00024 #include "lcp.hpp" 00025 #include "int_vector.hpp" 00026 #include "algorithms.hpp" 00027 #include "iterators.hpp" 00028 #include "bitmagic.hpp" 00029 #include <iostream> 00030 #include <algorithm> // for lower_bound 00031 #include <cassert> 00032 #include <cstring> // for strlen 00033 #include <iomanip> 00034 #include <iterator> 00035 #include <vector> 00036 #include <utility> // for pair 00037 00038 namespace sdsl 00039 { 00040 00042 template<uint8_t width=0> 00043 class lcp_bitcompressed 00044 { 00045 public: 00046 typedef typename int_vector<width>::value_type value_type; // STL Container requirement 00047 typedef typename int_vector<width>::size_type size_type; // STL Container requirement 00048 typedef random_access_const_iterator<lcp_bitcompressed> const_iterator;// STL Container requirement 00049 typedef const_iterator iterator; // STL Container requirement 00050 typedef const value_type const_reference; 00051 typedef const_reference reference; 00052 typedef const_reference* pointer; 00053 typedef const pointer const_pointer; 00054 typedef ptrdiff_t difference_type; // STL Container requirement 00055 00056 typedef lcp_plain_tag lcp_category; 00057 00058 enum { fast_access = 1, 00059 text_order = 0, 00060 sa_order = 1 00061 }; 00062 00063 template<class Cst> // template inner class which is used in CSTs to parametrize lcp classes 00064 class type // with information about the CST. Thanks Stefan Arnold! (2011-03-02) 00065 { 00066 public: 00067 typedef lcp_bitcompressed lcp_type; 00068 }; 00069 00070 private: 00071 int_vector<width> m_lcp; 00072 00073 void copy(const lcp_bitcompressed& lcp_c) { 00074 m_lcp = lcp_c.m_lcp; 00075 } 00076 public: 00078 lcp_bitcompressed() {} 00080 ~lcp_bitcompressed() {} 00082 lcp_bitcompressed(const lcp_bitcompressed& lcp_c) { 00083 copy(lcp_c); 00084 } 00085 00087 template<class Text, class Sa> 00088 lcp_bitcompressed(const Text& text, const Sa& sa); 00089 00091 template<uint8_t int_width, class size_type_class> 00092 lcp_bitcompressed(int_vector_file_buffer<int_width, size_type_class>& lcp_buf); 00093 00094 template<class Text, class Sa> 00095 void construct(const Text& text, const Sa& sa); 00096 00097 template<uint8_t int_width, class size_type_class> 00098 void construct(int_vector_file_buffer<int_width, size_type_class>& lcp_buf); 00099 00101 00104 size_type size()const { 00105 return m_lcp.size(); 00106 } 00107 00109 00112 static size_type max_size() { 00113 return int_vector<width>::max_size(); 00114 } 00115 00117 00120 bool empty()const { 00121 return m_lcp.empty(); 00122 } 00123 00125 00133 void swap(lcp_bitcompressed& lcp_c); 00134 00136 00139 const_iterator begin()const; 00140 00142 00145 const_iterator end()const; 00146 00148 00152 inline value_type operator[](size_type i)const; 00153 00155 00158 lcp_bitcompressed& operator=(const lcp_bitcompressed& lcp_c); 00159 00161 00166 bool operator==(const lcp_bitcompressed& lcp_c)const; 00167 00169 00174 bool operator!=(const lcp_bitcompressed& lcp_c)const; 00175 00177 00180 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const; 00181 00183 00185 void load(std::istream& in); 00186 }; 00187 00188 // == template functions == 00189 00190 template<uint8_t width> 00191 template<class Text, class Sa> 00192 lcp_bitcompressed<width>::lcp_bitcompressed(const Text& text, const Sa& sa) 00193 { 00194 construct(text, sa); 00195 } 00196 00197 template<uint8_t width> 00198 template<class Text, class Sa> 00199 void lcp_bitcompressed<width>::construct(const Text& text, const Sa& sa) 00200 { 00201 if (sa.size() == 0) { 00202 return; 00203 } 00204 m_lcp = int_vector<width>(sa.size(), 0, width); 00205 00206 // use Kasai algorithm to compute the lcp values 00207 typename int_vector<width>::size_type sa_1 = sa(0), l=0; 00208 for (typename int_vector<width>::size_type i=0,j=0; i < sa.size(); ++i) { 00209 if (l) --l; 00210 if (sa_1) { 00211 j = sa[sa_1-1]; 00212 while (i+l < sa.size() and j+l < sa.size() and text[i+l]==text[j+l]) ++l; 00213 } else { 00214 l = 0; 00215 } 00216 m_lcp[ sa_1 ] = l; 00217 sa_1 = sa.psi[sa_1]; 00218 } 00219 } 00220 00221 template<uint8_t width> 00222 template<uint8_t int_width, class size_type_class> 00223 lcp_bitcompressed<width>::lcp_bitcompressed(int_vector_file_buffer<int_width, size_type_class>& lcp_buf) 00224 { 00225 construct(lcp_buf); 00226 } 00227 00228 template<uint8_t width> 00229 template<uint8_t int_width, class size_type_class> 00230 void lcp_bitcompressed<width>::construct(int_vector_file_buffer<int_width, size_type_class>& lcp_buf) 00231 { 00232 lcp_buf.reset(); 00233 m_lcp = int_vector<width>(lcp_buf.int_vector_size, 0, lcp_buf.int_width); 00234 for (size_type i=0, r_sum=0, r = lcp_buf.load_next_block(); r_sum < m_lcp.size();) { 00235 for (; i < r_sum+r; ++i) { 00236 m_lcp[i] = lcp_buf[i-r_sum]; 00237 } 00238 r_sum += r; 00239 r = lcp_buf.load_next_block(); 00240 } 00241 } 00242 00243 template<uint8_t width> 00244 void lcp_bitcompressed<width>::swap(lcp_bitcompressed& lcp_c) 00245 { 00246 m_lcp.swap(lcp_c.m_lcp); 00247 } 00248 00249 template<uint8_t width> 00250 inline typename lcp_bitcompressed<width>::value_type lcp_bitcompressed<width>::operator[](size_type i)const 00251 { 00252 return m_lcp[i]; 00253 } 00254 00255 00256 template<uint8_t width> 00257 typename lcp_bitcompressed<width>::size_type lcp_bitcompressed<width>::serialize(std::ostream& out, structure_tree_node* v, std::string name)const 00258 { 00259 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00260 size_type written_bytes = 0; 00261 written_bytes += m_lcp.serialize(out, child, "lcp"); 00262 structure_tree::add_size(child, written_bytes); 00263 return written_bytes; 00264 } 00265 00266 template<uint8_t width> 00267 void lcp_bitcompressed<width>::load(std::istream& in) 00268 { 00269 m_lcp.load(in); 00270 } 00271 00272 00273 template<uint8_t width> 00274 lcp_bitcompressed<width>& lcp_bitcompressed<width>::operator=(const lcp_bitcompressed& lcp_c) 00275 { 00276 if (this != &lcp_c) { 00277 copy(lcp_c); 00278 } 00279 return *this; 00280 } 00281 00282 00283 template<uint8_t width> 00284 bool lcp_bitcompressed<width>::operator==(const lcp_bitcompressed& lcp_c)const 00285 { 00286 if (this == &lcp_c) 00287 return true; 00288 return m_lcp == lcp_c.m_lcp; 00289 } 00290 00291 template<uint8_t width> 00292 bool lcp_bitcompressed<width>::operator!=(const lcp_bitcompressed& lcp_c)const 00293 { 00294 return !(*this == lcp_c); 00295 } 00296 00297 template<uint8_t width> 00298 typename lcp_bitcompressed<width>::const_iterator lcp_bitcompressed<width>::begin()const 00299 { 00300 return const_iterator(this, 0); 00301 } 00302 00303 template<uint8_t width> 00304 typename lcp_bitcompressed<width>::const_iterator lcp_bitcompressed<width>::end()const 00305 { 00306 return const_iterator(this, size()); 00307 } 00308 00309 } // end namespace sdsl 00310 00311 #endif