SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2009 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef INCLUDED_SDSL_SUFFIXTREES 00022 #define INCLUDED_SDSL_SUFFIXTREES 00023 00024 #include "sdsl_concepts.hpp" 00025 #include "util.hpp" 00026 #include <iostream> 00027 #include <cmath> 00028 00029 using std::cout; 00030 using std::endl; 00031 00032 namespace sdsl 00033 { 00034 00036 00042 template<class tCst> 00043 double H0(const typename tCst::node_type& v, const tCst& cst) 00044 { 00045 if (cst.is_leaf(v)) { 00046 return 0; 00047 } else { 00048 double h0=0; 00049 typename tCst::size_type n = cst.leaves_in_the_subtree(v); 00050 typename tCst::node_type w = cst.ith_child(v, 1); 00051 do { 00052 double p = ((double)cst.leaves_in_the_subtree(w))/n; 00053 h0 -= p*log2(p); 00054 w = cst.sibling(w); 00055 } while (w != cst.root()); 00056 return h0; 00057 } 00058 } 00059 00061 00066 template<class tCst> 00067 double Hk(const tCst& cst, typename tCst::size_type k, typename tCst::size_type& context) 00068 { 00069 double hk = 0; 00070 context = 0; 00071 for (typename tCst::const_iterator it = cst.begin(), end=cst.end(); it != end; ++it) { 00072 if (it.visit() == 1) { 00073 typename tCst::size_type d = cst.depth(*it); 00074 if (d >= k) { 00075 if (d == k) 00076 hk += cst.leaves_in_the_subtree(*it) * H0(*it, cst); 00077 ++context; 00078 it.skip_subtree(); 00079 } 00080 } 00081 } 00082 hk /= cst.size(); 00083 return hk; 00084 } 00085 00086 00087 // Gets ISA[SA[idx]+d] 00088 // d = depth of the character 0 = first position 00089 template<class Csa> 00090 typename Csa::size_type get_char_pos(typename Csa::size_type idx, typename Csa::size_type d, const Csa& csa) 00091 { 00092 if (d == 0) 00093 return idx; 00094 // if we have to apply \f$\LF\f$ or \f$\Phi\f$ more 00095 // than 2*d times to calc csa(csa[idx]+d), we opt to 00096 // apply \f$ \Phi \f$ d times 00097 if ((csa.sa_sample_dens - 1) + (csa.isa_sample_dens - 1) > 2*d) { 00098 for (typename Csa::size_type i=0; i < d; ++i) 00099 idx = csa.psi[idx]; 00100 return idx; 00101 } 00102 return csa(csa[idx] + d); 00103 } 00104 00105 // output some informations about the constructed compressed suffix tree 00106 template<class Cst> 00107 void cst_info(const Cst& cst) 00108 { 00109 double csa_size, lcp_size, cst_size, nav_size, text_size; 00110 cout << "# alphabet size: " << (int)cst.csa.sigma << endl; 00111 cout << "# text size: " << (text_size = cst.csa.size()) << endl; 00112 cout << "# nodes: " << cst.nodes() << endl; 00113 cout << "# cst in MB: " << (cst_size=(((double)util::get_size_in_bytes(cst))/(1024.0*1024.0))) << endl; 00114 cout << "# csa in MB: " << (csa_size=(((double)util::get_size_in_bytes(cst.csa))/(1024.0*1024.0))) << endl; 00115 cout << "# csa samples in MB: " << ((double)util::get_size_in_bytes(cst.csa.sa_sample)+util::get_size_in_bytes(cst.csa.isa_sample))/(1024.0*1024.0) << endl; 00116 cout << "# lcp in MB: " << (lcp_size=(((double)util::get_size_in_bytes(cst.lcp))/(1024.0*1024.0))) << endl; 00117 cout << "# nav in MB: " << (nav_size=(cst_size-lcp_size-csa_size)) << endl; 00118 text_size /= 1024*1024; 00119 cout << "# "<< 8*csa_size/text_size <<" " << 8*lcp_size/text_size << " " << 8*nav_size/text_size << endl; 00120 } 00121 00122 } 00123 00124 00137 #include "cst_sct.hpp" 00138 #include "cst_sct2.hpp" 00139 #include "cst_sct3.hpp" 00140 #include "cst_sada.hpp" 00141 00142 #include "csa_uncompressed.hpp" 00143 #include "int_vector.hpp" 00144 00145 #include "cst_construct.hpp" 00146 00147 /* 00148 namespace sdsl{ 00149 00150 typedef cst_sct<csa_uncompressed, int_vector<0> > cst_sct_uncompressed; 00151 typedef cst_sada<csa_uncompressed, int_vector<0> > cst_sada_uncompressed; 00152 00153 } 00154 */ 00155 00156 #include <iostream> 00157 #include <string> 00158 00159 inline unsigned char _replace_sentinel(unsigned char c) 00160 { 00161 if (c==0) 00162 return '$'; 00163 return c; 00164 } 00165 00166 inline unsigned char _vc(unsigned char c) 00167 { 00168 if (c=='!') 00169 return '_'; 00170 else 00171 return '!'; 00172 } 00173 00175 template<class Cst> 00176 void output_cst_in_tikz(const Cst& cst) 00177 { 00178 std::cout<<"\ 00179 \\documentclass{article}\n\ 00180 \\usepackage{tikz}\n\ 00181 \\usepackage{verbatim}\n\ 00182 \\begin{document}\n\ 00183 \\begin{tikzpicture}\n\ 00184 [scale=0.8, transform shape, inner sep=1mm, font=\\small,\n\ 00185 innernode/.style={rectangle,draw=blue!50,fill=blue!20,thick,minimum width=#1,minimum height=0.5cm,rounded corners=2mm,anchor=south},\n\ 00186 innernode/.default=4cm,\n\ 00187 leafnode/.style={rectangle,draw=black!50,fill=black!20,thick,minimum width=#1,minimum height=0.5cm,anchor=south},\n\ 00188 leafnode/.default=1cm,\n\ 00189 ]\n"; 00190 00191 typedef typename Cst::node_type node_type; 00192 typedef typename Cst::size_type size_type; 00193 for (typename Cst::iterator it = cst.begin(); it!=cst.end(); ++it) { 00194 if (it.visit()==1) { 00195 node_type v = *it; 00196 double f = 1;//1.5; 00197 double fy = 0.9;//1.3; 00198 std::string style = cst.is_leaf(v) ? "leafnode" : "innernode"; 00199 double xpos = (cst.rb(v) + cst.lb(v)); 00200 double ypos = cst.depth(v); 00201 std::cout<<"\\node["<<style<<"="<<f* cst.leaves_in_the_subtree(v)-0.2<<"cm] (node "<<ypos<<"x"<<xpos<<") at ("<<f* xpos/2<<","<<-fy* ypos<<") {"<<v<<"};"<<std::endl; 00202 00203 if (v != cst.root()) { 00204 node_type p = cst.parent(v); 00205 double pypos = cst.depth(p); 00206 std::cout<<"\\draw[->] ("<<f* xpos/2<<","<<-fy* pypos<<") -- (node "<<ypos<<"x"<<xpos<<".north);"<<std::endl; 00207 00208 for (size_type i=cst.depth(p)+1; i <= cst.depth(*it); ++i) { 00209 unsigned char c = _replace_sentinel(cst.edge(*it,i)); 00210 double y = -fy*(pypos -1 + i-cst.depth(p) + 0.5); 00211 std::cout << "\\node[anchor=south east] at ("<<f* xpos/2<<","<<y<<"){\\verb"<<_vc(c)<<c<<_vc(c)<<"};"<<std::endl; 00212 } 00213 } 00214 } 00215 } 00216 std::cout<<"\ 00217 \\end{tikzpicture}\n\ 00218 \\end{document}\n"; 00219 } 00220 00221 00222 #endif