SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/lcp_bitcompressed.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2009 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef INCLUDED_SDSL_LCP_BITCOMPRESSED
00022 #define INCLUDED_SDSL_LCP_BITCOMPRESSED
00023 
00024 #include "lcp.hpp"
00025 #include "int_vector.hpp"
00026 #include "algorithms.hpp"
00027 #include "iterators.hpp"
00028 #include "bitmagic.hpp"
00029 #include <iostream>
00030 #include <algorithm> // for lower_bound
00031 #include <cassert>
00032 #include <cstring> // for strlen
00033 #include <iomanip>
00034 #include <iterator>
00035 #include <vector>
00036 #include <utility> // for pair
00037 
00038 namespace sdsl
00039 {
00040 
00042 template<uint8_t width=0>
00043 class lcp_bitcompressed
00044 {
00045     public:
00046         typedef typename int_vector<width>::value_type                  value_type;     // STL Container requirement
00047         typedef typename int_vector<width>::size_type                   size_type;              // STL Container requirement
00048         typedef random_access_const_iterator<lcp_bitcompressed>  const_iterator;// STL Container requirement
00049         typedef const_iterator                                                              iterator;           // STL Container requirement
00050         typedef const value_type                                                                const_reference;
00051         typedef const_reference                                                                 reference;
00052         typedef const_reference*                                                                pointer;
00053         typedef const pointer                                                                   const_pointer;
00054         typedef ptrdiff_t                                                                               difference_type; // STL Container requirement
00055 
00056         typedef lcp_plain_tag                                                                   lcp_category;
00057 
00058         enum { fast_access = 1,
00059                text_order  = 0,
00060                sa_order   = 1
00061              };
00062 
00063         template<class Cst>  // template inner class which is used in CSTs to parametrize lcp classes
00064         class type           // with information about the CST. Thanks Stefan Arnold! (2011-03-02)
00065         {
00066             public:
00067                 typedef lcp_bitcompressed lcp_type;
00068         };
00069 
00070     private:
00071         int_vector<width>  m_lcp;
00072 
00073         void copy(const lcp_bitcompressed& lcp_c) {
00074             m_lcp       = lcp_c.m_lcp;
00075         }
00076     public:
00078         lcp_bitcompressed() {}
00080         ~lcp_bitcompressed() {}
00082         lcp_bitcompressed(const lcp_bitcompressed& lcp_c) {
00083             copy(lcp_c);
00084         }
00085 
00087         template<class Text, class Sa>
00088         lcp_bitcompressed(const Text& text, const Sa& sa);
00089 
00091         template<uint8_t int_width, class size_type_class>
00092         lcp_bitcompressed(int_vector_file_buffer<int_width, size_type_class>& lcp_buf);
00093 
00094         template<class Text, class Sa>
00095         void construct(const Text& text, const Sa& sa);
00096 
00097         template<uint8_t int_width, class size_type_class>
00098         void construct(int_vector_file_buffer<int_width, size_type_class>& lcp_buf);
00099 
00101 
00104         size_type size()const {
00105             return m_lcp.size();
00106         }
00107 
00109 
00112         static size_type max_size() {
00113             return int_vector<width>::max_size();
00114         }
00115 
00117 
00120         bool empty()const {
00121             return m_lcp.empty();
00122         }
00123 
00125 
00133         void swap(lcp_bitcompressed& lcp_c);
00134 
00136 
00139         const_iterator begin()const;
00140 
00142 
00145         const_iterator end()const;
00146 
00148 
00152         inline value_type operator[](size_type i)const;
00153 
00155 
00158         lcp_bitcompressed& operator=(const lcp_bitcompressed& lcp_c);
00159 
00161 
00166         bool operator==(const lcp_bitcompressed& lcp_c)const;
00167 
00169 
00174         bool operator!=(const lcp_bitcompressed& lcp_c)const;
00175 
00177 
00180         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const;
00181 
00183 
00185         void load(std::istream& in);
00186 };
00187 
00188 // == template functions ==
00189 
00190 template<uint8_t width>
00191 template<class Text, class Sa>
00192 lcp_bitcompressed<width>::lcp_bitcompressed(const Text& text, const Sa& sa)
00193 {
00194     construct(text, sa);
00195 }
00196 
00197 template<uint8_t width>
00198 template<class Text, class Sa>
00199 void lcp_bitcompressed<width>::construct(const Text& text, const Sa& sa)
00200 {
00201     if (sa.size() == 0) {
00202         return;
00203     }
00204     m_lcp = int_vector<width>(sa.size(), 0, width);
00205 
00206     // use Kasai algorithm to compute the lcp values
00207     typename int_vector<width>::size_type sa_1 = sa(0), l=0;
00208     for (typename int_vector<width>::size_type i=0,j=0; i < sa.size(); ++i) {
00209         if (l) --l;
00210         if (sa_1) {
00211             j = sa[sa_1-1];
00212             while (i+l < sa.size() and j+l < sa.size() and text[i+l]==text[j+l]) ++l;
00213         } else {
00214             l = 0;
00215         }
00216         m_lcp[ sa_1 ] = l;
00217         sa_1 = sa.psi[sa_1];
00218     }
00219 }
00220 
00221 template<uint8_t width>
00222 template<uint8_t int_width, class size_type_class>
00223 lcp_bitcompressed<width>::lcp_bitcompressed(int_vector_file_buffer<int_width, size_type_class>& lcp_buf)
00224 {
00225     construct(lcp_buf);
00226 }
00227 
00228 template<uint8_t width>
00229 template<uint8_t int_width, class size_type_class>
00230 void lcp_bitcompressed<width>::construct(int_vector_file_buffer<int_width, size_type_class>& lcp_buf)
00231 {
00232     lcp_buf.reset();
00233     m_lcp = int_vector<width>(lcp_buf.int_vector_size, 0, lcp_buf.int_width);
00234     for (size_type i=0, r_sum=0, r = lcp_buf.load_next_block(); r_sum < m_lcp.size();) {
00235         for (; i < r_sum+r; ++i) {
00236             m_lcp[i] = lcp_buf[i-r_sum];
00237         }
00238         r_sum   +=      r;
00239         r               =       lcp_buf.load_next_block();
00240     }
00241 }
00242 
00243 template<uint8_t width>
00244 void lcp_bitcompressed<width>::swap(lcp_bitcompressed& lcp_c)
00245 {
00246     m_lcp.swap(lcp_c.m_lcp);
00247 }
00248 
00249 template<uint8_t width>
00250 inline typename lcp_bitcompressed<width>::value_type lcp_bitcompressed<width>::operator[](size_type i)const
00251 {
00252     return m_lcp[i];
00253 }
00254 
00255 
00256 template<uint8_t width>
00257 typename lcp_bitcompressed<width>::size_type lcp_bitcompressed<width>::serialize(std::ostream& out, structure_tree_node* v, std::string name)const
00258 {
00259     structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
00260     size_type written_bytes = 0;
00261     written_bytes += m_lcp.serialize(out, child, "lcp");
00262     structure_tree::add_size(child, written_bytes);
00263     return written_bytes;
00264 }
00265 
00266 template<uint8_t width>
00267 void lcp_bitcompressed<width>::load(std::istream& in)
00268 {
00269     m_lcp.load(in);
00270 }
00271 
00272 
00273 template<uint8_t width>
00274 lcp_bitcompressed<width>& lcp_bitcompressed<width>::operator=(const lcp_bitcompressed& lcp_c)
00275 {
00276     if (this != &lcp_c) {
00277         copy(lcp_c);
00278     }
00279     return *this;
00280 }
00281 
00282 
00283 template<uint8_t width>
00284 bool lcp_bitcompressed<width>::operator==(const lcp_bitcompressed& lcp_c)const
00285 {
00286     if (this == &lcp_c)
00287         return true;
00288     return m_lcp == lcp_c.m_lcp;
00289 }
00290 
00291 template<uint8_t width>
00292 bool lcp_bitcompressed<width>::operator!=(const lcp_bitcompressed& lcp_c)const
00293 {
00294     return !(*this == lcp_c);
00295 }
00296 
00297 template<uint8_t width>
00298 typename lcp_bitcompressed<width>::const_iterator lcp_bitcompressed<width>::begin()const
00299 {
00300     return const_iterator(this, 0);
00301 }
00302 
00303 template<uint8_t width>
00304 typename lcp_bitcompressed<width>::const_iterator lcp_bitcompressed<width>::end()const
00305 {
00306     return const_iterator(this, size());
00307 }
00308 
00309 } // end namespace sdsl
00310 
00311 #endif