SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/lcp_vlc.hpp
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2010 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00017 /* \file lcp_vlc.hpp
00018     \brief lcp_vlc.hpp contains an implementation of a (compressed) lcp array.
00019         \author Simon Gog
00020 */
00021 #ifndef INCLUDED_SDSL_LCP_VLC
00022 #define INCLUDED_SDSL_LCP_VLC
00023 
00024 #include "lcp.hpp"
00025 #include "vlc_vector.hpp"
00026 #include "int_vector.hpp"
00027 #include "algorithms.hpp"
00028 #include "iterators.hpp"
00029 #include "bitmagic.hpp"
00030 #include <iostream>
00031 #include <algorithm> // for lower_bound
00032 #include <cassert>
00033 #include <cstring> // for strlen
00034 #include <iomanip>
00035 #include <iterator>
00036 #include <vector>
00037 #include <utility> // for pair
00038 
00039 namespace sdsl
00040 {
00041 
00042 // A class for the compressed version of lcp information of an suffix array
00043 /* We use variable length code for the lcp entries.
00044  *  \par Time complexity
00045  *              - \f$\Order{d}\f$, where d is the sample density
00046  */
00047 template<class vlc_vec_type = vlc_vector<> >
00048 class lcp_vlc
00049 {
00050     public:
00051         typedef typename vlc_vec_type::value_type                value_type;    // STL Container requirement
00052         typedef random_access_const_iterator<lcp_vlc>            const_iterator;// STL Container requirement
00053         typedef const_iterator                                                           iterator;              // STL Container requirement
00054         typedef const value_type                                                         const_reference;
00055         typedef const_reference                                                          reference;
00056         typedef const_reference*                                                         pointer;
00057         typedef const pointer                                                            const_pointer;
00058         typedef typename vlc_vec_type::size_type                         size_type;             // STL Container requirement
00059         typedef typename vlc_vec_type::difference_type           difference_type; // STL Container requirement
00060 
00061         typedef lcp_plain_tag                                                            lcp_category;
00062 
00063         enum { fast_access = 0,
00064                text_order  = 0,
00065                sa_order   = 1
00066              };
00067 
00068     private:
00069 
00070         vlc_vec_type            m_vec;
00071 
00072         void copy(const lcp_vlc& lcp_c) {
00073             m_vec = lcp_c.m_vec;
00074         }
00075 
00076     public:
00078         lcp_vlc() {}
00080         ~lcp_vlc() {}
00082         lcp_vlc(const lcp_vlc& lcp_c) {
00083             copy(lcp_c);
00084         }
00085 
00087         template<class Text, class Sa>
00088         lcp_vlc(const Text& text, const Sa& sa);
00089 
00091         template<uint8_t int_width, class size_type_class>
00092         lcp_vlc(int_vector_file_buffer<int_width, size_type_class>& lcp_buf);
00093 
00094         template<class Text, class Sa>
00095         void construct(const Text& text, const Sa& sa);
00096 
00097         template<uint8_t int_width, class size_type_class>
00098         void construct(int_vector_file_buffer<int_width, size_type_class>& lcp_buf);
00099 
00101 
00104         size_type size()const {
00105             return m_vec.size();
00106         }
00107 
00109 
00112         static size_type max_size() {
00113             return vlc_vec_type::max_size();
00114         }
00115 
00117 
00120         bool empty()const {
00121             return m_vec.empty();
00122         }
00123 
00125 
00133         void swap(lcp_vlc& lcp_c);
00134 
00136 
00139         const_iterator begin()const;
00140 
00142 
00145         const_iterator end()const;
00146 
00148 
00152         inline value_type operator[](size_type i)const;
00153 
00155 
00158         lcp_vlc& operator=(const lcp_vlc& lcp_c);
00159 
00161 
00166         bool operator==(const lcp_vlc& lcp_c)const;
00167 
00169 
00174         bool operator!=(const lcp_vlc& lcp_c)const;
00175 
00177 
00180         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const;
00181 
00183 
00185         void load(std::istream& in);
00186 };
00187 
00188 // == template functions ==
00189 
00190 template<class vlc_vec_type>
00191 template<class Text, class Sa>
00192 lcp_vlc<vlc_vec_type>::lcp_vlc(const Text& text, const Sa& sa)
00193 {
00194     construct(text, sa);
00195 }
00196 
00197 template<class vlc_vec_type>
00198 template<class Text, class Sa>
00199 void lcp_vlc<vlc_vec_type>::construct(const Text& text, const Sa& sa)
00200 {
00201     if (sa.size() == 0) {
00202         return;
00203     }
00204     int_vector<> lcp;
00205     lcp.set_int_width(bit_magic::l1BP(sa.size()) + 1);
00206     lcp.resize(sa.size());
00207 
00208     // use Kasai algorithm to compute the lcp values
00209     typename Sa::size_type sa_1 = sa(0), l=0, max_l=0, max_big_idx=0;
00210     for (typename Sa::size_type i=0,j=0; i < sa.size(); ++i) {
00211         if (l) --l;
00212         if (sa_1) {
00213             j = sa[sa_1-1];
00214             while (i+l < sa.size() and j+l < sa.size() and text[i+l]==text[j+l]) ++l;
00215         } else {
00216             l = 0;
00217         }
00218         lcp[ sa_1 ] = l;
00219         sa_1 = sa.psi[sa_1];
00220     }
00221     m_vec = vlc_vec_type(lcp);
00222 }
00223 
00224 
00225 template<class vlc_vec_type>
00226 template<uint8_t int_width, class size_type_class>
00227 lcp_vlc<vlc_vec_type>::lcp_vlc(int_vector_file_buffer<int_width, size_type_class>& lcp_buf)
00228 {
00229     construct(lcp_buf);
00230 }
00231 
00232 template<class vlc_vec_type>
00233 template<uint8_t int_width, class size_type_class>
00234 void lcp_vlc<vlc_vec_type>::construct(int_vector_file_buffer<int_width, size_type_class>& lcp_buf)
00235 {
00236     m_vec.init(lcp_buf);
00237 }
00238 
00239 template<class vlc_vec_type>
00240 void lcp_vlc<vlc_vec_type>::swap(lcp_vlc& lcp_c)
00241 {
00242     m_vec.swap(lcp_c.m_vec);
00243 }
00244 
00245 template<class vlc_vec_type>
00246 inline typename lcp_vlc<vlc_vec_type>::value_type lcp_vlc<vlc_vec_type>::operator[](size_type i)const
00247 {
00248     return m_vec[i];
00249 }
00250 
00251 
00252 template<class vlc_vec_type>
00253 typename lcp_vlc<vlc_vec_type>::size_type lcp_vlc<vlc_vec_type>::serialize(std::ostream& out, structure_tree_node* v, std::string name)const
00254 {
00255     structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
00256     size_type written_bytes = 0;
00257     written_bytes += m_vec.serialize(out, child, "vec");
00258     structure_tree::add_size(child, written_bytes);
00259     return written_bytes;
00260 }
00261 
00262 template<class vlc_vec_type>
00263 void lcp_vlc<vlc_vec_type>::load(std::istream& in)
00264 {
00265     m_vec.load(in);
00266 }
00267 
00268 
00269 template<class vlc_vec_type>
00270 lcp_vlc<vlc_vec_type>& lcp_vlc<vlc_vec_type>::operator=(const lcp_vlc& lcp_c)
00271 {
00272     if (this != &lcp_c) {
00273         copy(lcp_c);
00274     }
00275     return *this;
00276 }
00277 
00278 
00279 template<class vlc_vec_type>
00280 bool lcp_vlc<vlc_vec_type>::operator==(const lcp_vlc& lcp_c)const
00281 {
00282     if (this == &lcp_c)
00283         return true;
00284     return m_vec == lcp_c.m_vec;
00285 }
00286 
00287 template<class vlc_vec_type>
00288 bool lcp_vlc<vlc_vec_type>::operator!=(const lcp_vlc& lcp_c)const
00289 {
00290     return !(*this == lcp_c);
00291 }
00292 
00293 template<class vlc_vec_type>
00294 typename lcp_vlc<vlc_vec_type>::const_iterator lcp_vlc<vlc_vec_type>::begin()const
00295 {
00296     return const_iterator(this, 0);
00297 }
00298 
00299 template<class vlc_vec_type>
00300 typename lcp_vlc<vlc_vec_type>::const_iterator lcp_vlc<vlc_vec_type>::end()const
00301 {
00302     return const_iterator(this, size());
00303 }
00304 
00305 
00306 
00307 } // end namespace sdsl
00308 
00309 #endif