SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/lcp_wt.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2010 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef INCLUDED_SDSL_LCP_WT
00022 #define INCLUDED_SDSL_LCP_WT
00023 
00024 #include "lcp.hpp"
00025 #include "int_vector.hpp"
00026 #include "algorithms.hpp"
00027 #include "iterators.hpp"
00028 #include "bitmagic.hpp"
00029 #include "wt_huff.hpp"
00030 #include "select_support_bs.hpp" // dummy select support for wavelet tree, as we don't use select in this application
00031 #include "util.hpp"
00032 #include "testutils.hpp"
00033 #include <iostream>
00034 #include <algorithm> // for lower_bound
00035 #include <cassert>
00036 #include <cstring> // for strlen
00037 #include <iomanip>
00038 #include <iterator>
00039 #include <vector>
00040 #include <utility> // for pair
00041 #include <stdexcept>
00042 
00043 namespace sdsl
00044 {
00045 
00047 
00052 template<uint8_t width=0>
00053 class lcp_wt
00054 {
00055     public:
00056         typedef typename int_vector<width>::value_type           value_type;    // STL Container requirement
00057         typedef random_access_const_iterator<lcp_wt>             const_iterator;// STL Container requirement
00058         typedef const_iterator                                                           iterator;              // STL Container requirement
00059         typedef const value_type                                                         const_reference;
00060         typedef const_reference                                                          reference;
00061         typedef const_reference*                                                         pointer;
00062         typedef const pointer                                                            const_pointer;
00063         typedef int_vector<>::size_type                                          size_type;             // STL Container requirement
00064         typedef ptrdiff_t                                                                        difference_type; // STL Container requirement
00065 
00066         typedef lcp_plain_tag                                                            lcp_category;
00067 
00068         enum { fast_access = 0,
00069                text_order  = 0,
00070                sa_order   = 1
00071              }; // as the lcp_wt is not fast for texts with long repetition
00072 
00073         template<class Cst>  // template inner class which is used in CSTs to parametrize lcp classes
00074         class type           // with information about the CST. Thanks Stefan Arnold! (2011-03-02)
00075         {
00076             public:
00077                 typedef lcp_wt lcp_type;
00078         };
00079 
00080     private:
00081         typedef select_support_bs< rank_support_v<> > tDummySS;
00082         wt_huff<bit_vector, rank_support_v<>, tDummySS, tDummySS > m_small_lcp; // vector for lcp values < 255
00083         int_vector<width>   m_big_lcp;          // vector for lcp values > 254
00084 
00085         typedef std::pair<size_type, size_type> tPII;
00086         typedef std::vector<tPII> tVPII;
00087 
00088         void copy(const lcp_wt& lcp_c) {
00089             m_small_lcp         = lcp_c.m_small_lcp;
00090             m_big_lcp           = lcp_c.m_big_lcp;
00091         }
00092 
00093     public:
00095         lcp_wt() {}
00097         ~lcp_wt() {}
00099         lcp_wt(const lcp_wt& lcp_c) {
00100             copy(lcp_c);
00101         }
00102 
00104         template<class Text, class Sa>
00105         lcp_wt(const Text& text, const Sa& sa);
00106 
00108         template<uint8_t int_width, class size_type_class>
00109         lcp_wt(int_vector_file_buffer<int_width, size_type_class>& lcp_buf);
00110 
00111         template<class Text, class Sa>
00112         void construct(const Text& text, const Sa& sa);
00113 
00114         template<uint8_t int_width, class size_type_class>
00115         void construct(int_vector_file_buffer<int_width, size_type_class>& lcp_buf);
00116 
00118 
00121         size_type size()const {
00122             return m_small_lcp.size();
00123         }
00124 
00126 
00129         static size_type max_size() {
00130             return int_vector<8>::max_size();
00131         }
00132 
00134 
00137         bool empty()const {
00138             return m_small_lcp.size()==0;
00139         }
00140 
00142 
00150         void swap(lcp_wt& lcp_c);
00151 
00153 
00156         const_iterator begin()const;
00157 
00159 
00162         const_iterator end()const;
00163 
00165 
00169         inline value_type operator[](size_type i)const;
00170 
00172 
00175         lcp_wt& operator=(const lcp_wt& lcp_c);
00176 
00178 
00183         bool operator==(const lcp_wt& lcp_c)const;
00184 
00186 
00191         bool operator!=(const lcp_wt& lcp_c)const;
00192 
00194 
00197         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const;
00198 
00200 
00202         void load(std::istream& in);
00203 };
00204 
00205 // == template functions ==
00206 
00207 template<uint8_t width>
00208 template<class Text, class Sa>
00209 lcp_wt<width>::lcp_wt(const Text& text, const Sa& sa)
00210 {
00211     construct(text, sa);
00212 }
00213 
00214 template<uint8_t width>
00215 template<class Text, class Sa>
00216 void lcp_wt<width>::construct(const Text& text, const Sa& sa)
00217 {
00218     if (sa.size() == 0) {
00219         return;
00220     }
00221     throw std::logic_error("This constructor of lcp_wt is not yet implemented!");
00222     /*
00223     */
00224 }
00225 
00226 
00227 template<uint8_t width>
00228 template<uint8_t int_width, class size_type_class>
00229 lcp_wt<width>::lcp_wt(int_vector_file_buffer<int_width, size_type_class>& lcp_buf)
00230 {
00231     construct(lcp_buf);
00232 }
00233 
00234 template<uint8_t width>
00235 template<uint8_t int_width, class size_type_class>
00236 void lcp_wt<width>::construct(int_vector_file_buffer<int_width, size_type_class>& lcp_buf)
00237 {
00238     std::string temp_file = "/tmp/lcp_sml" + util::to_string(util::get_pid()) + "_" + util::to_string(util::get_id()) ;// TODO: remove absolute file name
00239 //      write_R_output("lcp","construct sml","begin");
00240     typename int_vector<>::size_type l=0, max_l=0, big_sum=0, n = lcp_buf.int_vector_size;
00241     {
00242         int_vector<8> small_lcp = int_vector<8>(n);
00243         lcp_buf.reset();
00244         for (size_type i=0, r_sum=0, r = lcp_buf.load_next_block(); r_sum < n;) {
00245             for (; i < r_sum+r; ++i) {
00246                 if ((l=lcp_buf[i-r_sum]) < 255) {
00247                     small_lcp[i] = l;
00248                 } else {
00249                     small_lcp[i] = 255;
00250                     if (l > max_l) max_l = l;
00251                     ++big_sum;
00252                 }
00253             }
00254             r_sum += r; r = lcp_buf.load_next_block();
00255         }
00256         util::store_to_file(small_lcp, temp_file.c_str());
00257     }
00258 //      write_R_output("lcp","construct sml","end");
00259     int_vector_file_buffer<8> lcp_sml_buf(temp_file.c_str());
00260     m_small_lcp.construct(lcp_sml_buf, lcp_sml_buf.int_vector_size);
00261     std::remove(temp_file.c_str());
00262 //      write_R_output("lcp","construct big","begin");
00263     m_big_lcp           = int_vector<>(big_sum, 0, bit_magic::l1BP(max_l)+1);
00264     {
00265         lcp_buf.reset();
00266         for (size_type i=0, ii=0, r_sum=0, r = lcp_buf.load_next_block(); r_sum < n;) {
00267             for (; i < r_sum+r; ++i) {
00268                 if (lcp_buf[i-r_sum] >= 255) {
00269                     m_big_lcp[ ii++ ] = lcp_buf[i-r_sum];
00270                 }
00271             }
00272             r_sum += r; r = lcp_buf.load_next_block();
00273         }
00274     }
00275 //      write_R_output("lcp","construct big","end");
00276 }
00277 
00278 template<uint8_t width>
00279 void lcp_wt<width>::swap(lcp_wt& lcp_c)
00280 {
00281     m_small_lcp.swap(lcp_c.m_small_lcp);
00282     m_big_lcp.swap(lcp_c.m_big_lcp);
00283 }
00284 
00285 template<uint8_t width>
00286 inline typename lcp_wt<width>::value_type lcp_wt<width>::operator[](size_type i)const
00287 {
00288     if (m_small_lcp[i]!=255) {
00289         return m_small_lcp[i];
00290     } else {
00291         return m_big_lcp[ m_small_lcp.rank(i, 255) ];
00292     }
00293 }
00294 
00295 
00296 template<uint8_t width>
00297 typename lcp_wt<width>::size_type lcp_wt<width>::serialize(std::ostream& out, structure_tree_node* v, std::string name)const
00298 {
00299     structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
00300     size_type written_bytes = 0;
00301     written_bytes += m_small_lcp.serialize(out, child,  "small_lcp");
00302     written_bytes += m_big_lcp.serialize(out, child, "large_lcp");
00303     structure_tree::add_size(child, written_bytes);
00304     return written_bytes;
00305 }
00306 
00307 template<uint8_t width>
00308 void lcp_wt<width>::load(std::istream& in)
00309 {
00310     m_small_lcp.load(in);
00311     m_big_lcp.load(in);
00312 }
00313 
00314 
00315 template<uint8_t width>
00316 lcp_wt<width>& lcp_wt<width>::operator=(const lcp_wt& lcp_c)
00317 {
00318     if (this != &lcp_c) {
00319         copy(lcp_c);
00320     }
00321     return *this;
00322 }
00323 
00324 
00325 template<uint8_t width>
00326 bool lcp_wt<width>::operator==(const lcp_wt& lcp_c)const
00327 {
00328     if (this == &lcp_c)
00329         return true;
00330     return m_small_lcp == lcp_c.m_small_lcp and m_big_lcp == lcp_c.m_big_lcp;
00331 }
00332 
00333 template<uint8_t width>
00334 bool lcp_wt<width>::operator!=(const lcp_wt& lcp_c)const
00335 {
00336     return !(*this == lcp_c);
00337 }
00338 
00339 template<uint8_t width>
00340 typename lcp_wt<width>::const_iterator lcp_wt<width>::begin()const
00341 {
00342     return const_iterator(this, 0);
00343 }
00344 
00345 template<uint8_t width>
00346 typename lcp_wt<width>::const_iterator lcp_wt<width>::end()const
00347 {
00348     return const_iterator(this, size());
00349 }
00350 
00351 
00352 
00353 } // end namespace sdsl
00354 
00355 #endif