SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/lcp_support_tree2.hpp
00001 #ifndef INCLUDED_SDSL_SUPPORT_TREE2
00002 #define INCLUDED_SDSL_SUPPORT_TREE2
00003 
00004 #include "lcp.hpp"
00005 #include "util.hpp"
00006 #include "algorithms_for_compressed_suffix_trees.hpp"
00007 #include "rank_support_v.hpp"
00008 #include "select_support_dummy.hpp"
00009 #include "wt_huff.hpp"
00010 #include <iostream>
00011 #include <string>
00012 
00013 namespace sdsl
00014 {
00015 
00016 
00024 template<uint32_t SampleDens, class Cst>
00025 class _lcp_support_tree2
00026 {
00027     public:
00028         typedef int_vector<>::value_type                                value_type;      // STL Container requirement
00029         typedef random_access_const_iterator<_lcp_support_tree2>        const_iterator;  // STL Container requirement
00030         typedef const_iterator                                                                  iterator;                // STL Container requirement
00031         typedef const value_type                                                                const_reference;
00032         typedef const_reference                                                                 reference;
00033         typedef const_reference*                                                                pointer;
00034         typedef const pointer                                                                   const_pointer;
00035         typedef int_vector<>::size_type                                                 size_type;               // STL Container requirement
00036         typedef int_vector<>::difference_type                                   difference_type; // STL Container requirement
00037         typedef Cst                                                                                             cst_type;
00038 
00039         typedef lcp_tree_and_lf_compressed_tag                                  lcp_category;
00040 
00041         enum { fast_access = 0,
00042                text_order = 0,
00043                sa_order = 0
00044              };
00045 
00046         template<class CST>  // template inner class which is used in CSTs to parametrize lcp classes
00047         class type           // with information about the CST. Thanks Stefan Arnold! (2011-03-02)
00048         {
00049             public:
00050                 typedef _lcp_support_tree2 lcp_type;
00051         };
00052 
00053     private:
00054         const cst_type* m_cst;
00055         typedef select_support_dummy tDummySS;
00056         wt_huff<bit_vector, rank_support_v5<>, tDummySS, tDummySS > m_small_lcp; // vector for lcp values < 254
00057         int_vector<> m_big_lcp;  // vector for lcp values >= 254
00058 
00059         void copy(const _lcp_support_tree2& lcp_c) {
00060             m_small_lcp = lcp_c.m_small_lcp;
00061             m_big_lcp = lcp_c.m_big_lcp;
00062             m_cst = lcp_c.m_cst;
00063         }
00064 
00065     public:
00066 
00068         _lcp_support_tree2() {}
00069 
00070         // Destructor
00071         ~_lcp_support_tree2() {}
00072 
00074         _lcp_support_tree2(const _lcp_support_tree2& lcp) {
00075             copy(lcp);
00076         }
00077 
00081         template<class Text, class Sa>
00082         void construct(const Text& text, const Sa& sa, const cst_type* cst);
00083 
00084 
00086 
00089         template<uint8_t int_width, class size_type_class>
00090         void construct(int_vector_file_buffer<int_width, size_type_class>& lcp_buf,
00091                        int_vector_file_buffer<8, size_type_class>& bwt_buf,
00092                        const cst_type* cst = NULL) {
00093             m_cst = cst;
00094             std::string small_lcp_file_name =  util::to_string(util::get_pid())+"_"+util::to_string(util::get_id()).c_str() + "_fc_lf_lcp_sml";
00095             std::string big_lcp_file_name =  util::to_string(util::get_pid())+"_"+util::to_string(util::get_id()).c_str() + "_fc_lf_lcp_big";
00096 
00097             algorithm::construct_first_child_and_lf_lcp<SampleDens>(lcp_buf, bwt_buf,
00098                     small_lcp_file_name, big_lcp_file_name, m_big_lcp);
00099             // construct wavelet tree huffman from file buffer
00100             int_vector_file_buffer<8> small_lcp_buf(small_lcp_file_name.c_str());
00101             m_small_lcp.construct(small_lcp_buf, small_lcp_buf.int_vector_size);
00102             std::remove(small_lcp_file_name.c_str());
00103         }
00104 
00105         void set_cst(const cst_type* cst) {
00106             m_cst = cst;
00107         }
00108 
00109         size_type size()const {
00110             return m_cst->size(); // corresponds to the length of the
00111             // original lcp array
00112         }
00113 
00114         static size_type max_size() {
00115             return int_vector<>::max_size();
00116         }
00117 
00118         size_type empty()const {
00119             return m_small_lcp.empty();
00120         }
00121 
00122         void swap(_lcp_support_tree2& lcp_c) {
00123             m_small_lcp.swap(lcp_c.m_small_lcp);
00124             m_big_lcp.swap(lcp_c.m_big_lcp);
00125         }
00126 
00128 
00131         const_iterator begin()const {
00132             return const_iterator(this, 0);
00133         }
00134 
00136 
00139         const_iterator end()const {
00140             return const_iterator(this, size());
00141         }
00142 
00144 
00150         inline value_type operator[](size_type i)const {
00151             size_type idx, offset=0;
00152             uint8_t val;
00153 start:
00154             idx = m_cst->tlcp_idx(i);
00155             val = m_small_lcp[idx];
00156             if (val < 254) {
00157                 return val;// - offset;
00158             } else if (val == 254) { // if lcp value is >= 254 and position i is reducible
00159                 i = m_cst->csa.psi(i); // i = LF[i]    // (*m_psi)(i);
00160                 ++offset; // goto lcp value, which is one bigger
00161                 goto start;
00162             } else { // if lcp value is >= 254 and (not reducable or sampled)
00163                 return m_big_lcp[m_small_lcp.rank(idx ,255)] - offset;
00164             }
00165         }
00166 
00168 
00171         _lcp_support_tree2& operator=(const _lcp_support_tree2& lcp_c) {
00172             if (this != &lcp_c) {
00173                 copy(lcp_c);
00174             }
00175             return *this;
00176         }
00177 
00179 
00184         bool operator==(const _lcp_support_tree2& lcp_c)const {
00185             if (this == &lcp_c)
00186                 return true;
00187             return m_cst == lcp_c.m_cst and
00188                    m_small_lcp == lcp_c.m_small_lcp and
00189                    m_big_lcp == lcp_c.m_big_lcp;
00190         }
00191 
00193 
00198         bool operator!=(const _lcp_support_tree2& lcp_c)const {
00199             return !(*this == lcp_c);
00200         }
00201 
00203 
00206         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const {
00207             structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
00208             size_type written_bytes = 0;
00209             written_bytes += m_small_lcp.serialize(out, child, "small_lcp");
00210             written_bytes += m_big_lcp.serialize(out, child, "large_lcp");
00211             structure_tree::add_size(child, written_bytes);
00212             return written_bytes;
00213         }
00214 
00216 
00220         void load(std::istream& in, const Cst* cst=NULL) {
00221             m_small_lcp.load(in);
00222             m_big_lcp.load(in);
00223             m_cst = cst;
00224         }
00225 };
00226 
00228 template<uint32_t SampleDens=16>
00229 class lcp_support_tree2
00230 {
00231     public:
00232         template<class Cst>  // template inner class which is used in CSTs to parametrize lcp classes
00233         class type           // with information about the CST. Thanks Stefan Arnold! (2011-03-02)
00234         {
00235             public:
00236                 typedef _lcp_support_tree2<SampleDens, Cst> lcp_type;
00237         };
00238 };
00239 
00240 
00241 
00242 } // end namespace
00243 
00244 #endif