SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 #ifndef INCLUDED_SDSL_SUPPORT_TREE2 00002 #define INCLUDED_SDSL_SUPPORT_TREE2 00003 00004 #include "lcp.hpp" 00005 #include "util.hpp" 00006 #include "algorithms_for_compressed_suffix_trees.hpp" 00007 #include "rank_support_v.hpp" 00008 #include "select_support_dummy.hpp" 00009 #include "wt_huff.hpp" 00010 #include <iostream> 00011 #include <string> 00012 00013 namespace sdsl 00014 { 00015 00016 00024 template<uint32_t SampleDens, class Cst> 00025 class _lcp_support_tree2 00026 { 00027 public: 00028 typedef int_vector<>::value_type value_type; // STL Container requirement 00029 typedef random_access_const_iterator<_lcp_support_tree2> const_iterator; // STL Container requirement 00030 typedef const_iterator iterator; // STL Container requirement 00031 typedef const value_type const_reference; 00032 typedef const_reference reference; 00033 typedef const_reference* pointer; 00034 typedef const pointer const_pointer; 00035 typedef int_vector<>::size_type size_type; // STL Container requirement 00036 typedef int_vector<>::difference_type difference_type; // STL Container requirement 00037 typedef Cst cst_type; 00038 00039 typedef lcp_tree_and_lf_compressed_tag lcp_category; 00040 00041 enum { fast_access = 0, 00042 text_order = 0, 00043 sa_order = 0 00044 }; 00045 00046 template<class CST> // template inner class which is used in CSTs to parametrize lcp classes 00047 class type // with information about the CST. Thanks Stefan Arnold! (2011-03-02) 00048 { 00049 public: 00050 typedef _lcp_support_tree2 lcp_type; 00051 }; 00052 00053 private: 00054 const cst_type* m_cst; 00055 typedef select_support_dummy tDummySS; 00056 wt_huff<bit_vector, rank_support_v5<>, tDummySS, tDummySS > m_small_lcp; // vector for lcp values < 254 00057 int_vector<> m_big_lcp; // vector for lcp values >= 254 00058 00059 void copy(const _lcp_support_tree2& lcp_c) { 00060 m_small_lcp = lcp_c.m_small_lcp; 00061 m_big_lcp = lcp_c.m_big_lcp; 00062 m_cst = lcp_c.m_cst; 00063 } 00064 00065 public: 00066 00068 _lcp_support_tree2() {} 00069 00070 // Destructor 00071 ~_lcp_support_tree2() {} 00072 00074 _lcp_support_tree2(const _lcp_support_tree2& lcp) { 00075 copy(lcp); 00076 } 00077 00081 template<class Text, class Sa> 00082 void construct(const Text& text, const Sa& sa, const cst_type* cst); 00083 00084 00086 00089 template<uint8_t int_width, class size_type_class> 00090 void construct(int_vector_file_buffer<int_width, size_type_class>& lcp_buf, 00091 int_vector_file_buffer<8, size_type_class>& bwt_buf, 00092 const cst_type* cst = NULL) { 00093 m_cst = cst; 00094 std::string small_lcp_file_name = util::to_string(util::get_pid())+"_"+util::to_string(util::get_id()).c_str() + "_fc_lf_lcp_sml"; 00095 std::string big_lcp_file_name = util::to_string(util::get_pid())+"_"+util::to_string(util::get_id()).c_str() + "_fc_lf_lcp_big"; 00096 00097 algorithm::construct_first_child_and_lf_lcp<SampleDens>(lcp_buf, bwt_buf, 00098 small_lcp_file_name, big_lcp_file_name, m_big_lcp); 00099 // construct wavelet tree huffman from file buffer 00100 int_vector_file_buffer<8> small_lcp_buf(small_lcp_file_name.c_str()); 00101 m_small_lcp.construct(small_lcp_buf, small_lcp_buf.int_vector_size); 00102 std::remove(small_lcp_file_name.c_str()); 00103 } 00104 00105 void set_cst(const cst_type* cst) { 00106 m_cst = cst; 00107 } 00108 00109 size_type size()const { 00110 return m_cst->size(); // corresponds to the length of the 00111 // original lcp array 00112 } 00113 00114 static size_type max_size() { 00115 return int_vector<>::max_size(); 00116 } 00117 00118 size_type empty()const { 00119 return m_small_lcp.empty(); 00120 } 00121 00122 void swap(_lcp_support_tree2& lcp_c) { 00123 m_small_lcp.swap(lcp_c.m_small_lcp); 00124 m_big_lcp.swap(lcp_c.m_big_lcp); 00125 } 00126 00128 00131 const_iterator begin()const { 00132 return const_iterator(this, 0); 00133 } 00134 00136 00139 const_iterator end()const { 00140 return const_iterator(this, size()); 00141 } 00142 00144 00150 inline value_type operator[](size_type i)const { 00151 size_type idx, offset=0; 00152 uint8_t val; 00153 start: 00154 idx = m_cst->tlcp_idx(i); 00155 val = m_small_lcp[idx]; 00156 if (val < 254) { 00157 return val;// - offset; 00158 } else if (val == 254) { // if lcp value is >= 254 and position i is reducible 00159 i = m_cst->csa.psi(i); // i = LF[i] // (*m_psi)(i); 00160 ++offset; // goto lcp value, which is one bigger 00161 goto start; 00162 } else { // if lcp value is >= 254 and (not reducable or sampled) 00163 return m_big_lcp[m_small_lcp.rank(idx ,255)] - offset; 00164 } 00165 } 00166 00168 00171 _lcp_support_tree2& operator=(const _lcp_support_tree2& lcp_c) { 00172 if (this != &lcp_c) { 00173 copy(lcp_c); 00174 } 00175 return *this; 00176 } 00177 00179 00184 bool operator==(const _lcp_support_tree2& lcp_c)const { 00185 if (this == &lcp_c) 00186 return true; 00187 return m_cst == lcp_c.m_cst and 00188 m_small_lcp == lcp_c.m_small_lcp and 00189 m_big_lcp == lcp_c.m_big_lcp; 00190 } 00191 00193 00198 bool operator!=(const _lcp_support_tree2& lcp_c)const { 00199 return !(*this == lcp_c); 00200 } 00201 00203 00206 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { 00207 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00208 size_type written_bytes = 0; 00209 written_bytes += m_small_lcp.serialize(out, child, "small_lcp"); 00210 written_bytes += m_big_lcp.serialize(out, child, "large_lcp"); 00211 structure_tree::add_size(child, written_bytes); 00212 return written_bytes; 00213 } 00214 00216 00220 void load(std::istream& in, const Cst* cst=NULL) { 00221 m_small_lcp.load(in); 00222 m_big_lcp.load(in); 00223 m_cst = cst; 00224 } 00225 }; 00226 00228 template<uint32_t SampleDens=16> 00229 class lcp_support_tree2 00230 { 00231 public: 00232 template<class Cst> // template inner class which is used in CSTs to parametrize lcp classes 00233 class type // with information about the CST. Thanks Stefan Arnold! (2011-03-02) 00234 { 00235 public: 00236 typedef _lcp_support_tree2<SampleDens, Cst> lcp_type; 00237 }; 00238 }; 00239 00240 00241 00242 } // end namespace 00243 00244 #endif