SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/suffixtrees.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2009 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef INCLUDED_SDSL_SUFFIXTREES
00022 #define INCLUDED_SDSL_SUFFIXTREES
00023 
00024 #include "sdsl_concepts.hpp"
00025 #include "util.hpp"
00026 #include <iostream>
00027 #include <cmath>
00028 
00029 using std::cout;
00030 using std::endl;
00031 
00032 namespace sdsl
00033 {
00034 
00036 
00042 template<class tCst>
00043 double H0(const typename tCst::node_type& v, const tCst& cst)
00044 {
00045     if (cst.is_leaf(v)) {
00046         return 0;
00047     } else {
00048         double h0=0;
00049         typename tCst::size_type n = cst.leaves_in_the_subtree(v);
00050         typename tCst::node_type w = cst.ith_child(v, 1);
00051         do {
00052             double p = ((double)cst.leaves_in_the_subtree(w))/n;
00053             h0 -= p*log2(p);
00054             w = cst.sibling(w);
00055         } while (w != cst.root());
00056         return h0;
00057     }
00058 }
00059 
00061 
00066 template<class tCst>
00067 double Hk(const tCst& cst, typename tCst::size_type k, typename tCst::size_type& context)
00068 {
00069     double hk = 0;
00070     context = 0;
00071     for (typename tCst::const_iterator it = cst.begin(), end=cst.end(); it != end; ++it) {
00072         if (it.visit() == 1) {
00073             typename tCst::size_type d = cst.depth(*it);
00074             if (d >= k) {
00075                 if (d == k)
00076                     hk += cst.leaves_in_the_subtree(*it) * H0(*it, cst);
00077                 ++context;
00078                 it.skip_subtree();
00079             }
00080         }
00081     }
00082     hk /= cst.size();
00083     return hk;
00084 }
00085 
00086 
00087 // Gets ISA[SA[idx]+d]
00088 // d = depth of the character 0 = first position
00089 template<class Csa>
00090 typename Csa::size_type get_char_pos(typename Csa::size_type idx, typename Csa::size_type d, const Csa& csa)
00091 {
00092     if (d == 0)
00093         return idx;
00094     // if we have to apply \f$\LF\f$ or \f$\Phi\f$ more
00095     // than 2*d times to calc csa(csa[idx]+d), we opt to
00096     // apply \f$ \Phi \f$ d times
00097     if ((csa.sa_sample_dens - 1) + (csa.isa_sample_dens - 1) > 2*d) {
00098         for (typename Csa::size_type i=0; i < d; ++i)
00099             idx = csa.psi[idx];
00100         return idx;
00101     }
00102     return csa(csa[idx] + d);
00103 }
00104 
00105 // output some informations about the constructed compressed suffix tree
00106 template<class Cst>
00107 void cst_info(const Cst& cst)
00108 {
00109     double csa_size, lcp_size, cst_size, nav_size, text_size;
00110     cout << "# alphabet size: " << (int)cst.csa.sigma << endl;
00111     cout << "# text size: " << (text_size = cst.csa.size()) << endl;
00112     cout << "# nodes: " << cst.nodes() << endl;
00113     cout << "# cst in MB: " << (cst_size=(((double)util::get_size_in_bytes(cst))/(1024.0*1024.0))) << endl;
00114     cout << "# csa in MB: " << (csa_size=(((double)util::get_size_in_bytes(cst.csa))/(1024.0*1024.0))) << endl;
00115     cout << "# csa samples in MB: " << ((double)util::get_size_in_bytes(cst.csa.sa_sample)+util::get_size_in_bytes(cst.csa.isa_sample))/(1024.0*1024.0) << endl;
00116     cout << "# lcp in MB: " << (lcp_size=(((double)util::get_size_in_bytes(cst.lcp))/(1024.0*1024.0))) << endl;
00117     cout << "# nav in MB: " << (nav_size=(cst_size-lcp_size-csa_size)) << endl;
00118     text_size /= 1024*1024;
00119     cout << "# "<< 8*csa_size/text_size <<" " << 8*lcp_size/text_size << " " << 8*nav_size/text_size << endl;
00120 }
00121 
00122 }
00123 
00124 
00137 #include "cst_sct.hpp"
00138 #include "cst_sct2.hpp"
00139 #include "cst_sct3.hpp"
00140 #include "cst_sada.hpp"
00141 
00142 #include "csa_uncompressed.hpp"
00143 #include "int_vector.hpp"
00144 
00145 #include "cst_construct.hpp"
00146 
00147 /*
00148 namespace sdsl{
00149 
00150 typedef cst_sct<csa_uncompressed, int_vector<0> > cst_sct_uncompressed;
00151 typedef cst_sada<csa_uncompressed, int_vector<0> > cst_sada_uncompressed;
00152 
00153 }
00154 */
00155 
00156 #include <iostream>
00157 #include <string>
00158 
00159 inline unsigned char _replace_sentinel(unsigned char c)
00160 {
00161     if (c==0)
00162         return '$';
00163     return c;
00164 }
00165 
00166 inline unsigned char _vc(unsigned char c)
00167 {
00168     if (c=='!')
00169         return '_';
00170     else
00171         return '!';
00172 }
00173 
00175 template<class Cst>
00176 void output_cst_in_tikz(const Cst& cst)
00177 {
00178     std::cout<<"\
00179 \\documentclass{article}\n\
00180 \\usepackage{tikz}\n\
00181 \\usepackage{verbatim}\n\
00182 \\begin{document}\n\
00183 \\begin{tikzpicture}\n\
00184 [scale=0.8, transform shape, inner sep=1mm, font=\\small,\n\
00185 innernode/.style={rectangle,draw=blue!50,fill=blue!20,thick,minimum width=#1,minimum height=0.5cm,rounded corners=2mm,anchor=south},\n\
00186 innernode/.default=4cm,\n\
00187 leafnode/.style={rectangle,draw=black!50,fill=black!20,thick,minimum width=#1,minimum height=0.5cm,anchor=south},\n\
00188 leafnode/.default=1cm,\n\
00189 ]\n";
00190 
00191     typedef typename Cst::node_type node_type;
00192     typedef typename Cst::size_type size_type;
00193     for (typename Cst::iterator it = cst.begin(); it!=cst.end(); ++it) {
00194         if (it.visit()==1) {
00195             node_type v = *it;
00196             double f = 1;//1.5;
00197             double fy = 0.9;//1.3;
00198             std::string style = cst.is_leaf(v) ? "leafnode" : "innernode";
00199             double xpos = (cst.rb(v) + cst.lb(v));
00200             double ypos = cst.depth(v);
00201             std::cout<<"\\node["<<style<<"="<<f* cst.leaves_in_the_subtree(v)-0.2<<"cm] (node "<<ypos<<"x"<<xpos<<") at ("<<f* xpos/2<<","<<-fy* ypos<<") {"<<v<<"};"<<std::endl;
00202 
00203             if (v != cst.root()) {
00204                 node_type p =  cst.parent(v);
00205                 double pypos = cst.depth(p);
00206                 std::cout<<"\\draw[->] ("<<f* xpos/2<<","<<-fy* pypos<<") -- (node "<<ypos<<"x"<<xpos<<".north);"<<std::endl;
00207 
00208                 for (size_type i=cst.depth(p)+1; i <= cst.depth(*it); ++i) {
00209                     unsigned char c = _replace_sentinel(cst.edge(*it,i));
00210                     double y = -fy*(pypos -1 + i-cst.depth(p) + 0.5);
00211                     std::cout << "\\node[anchor=south east] at ("<<f* xpos/2<<","<<y<<"){\\verb"<<_vc(c)<<c<<_vc(c)<<"};"<<std::endl;
00212                 }
00213             }
00214         }
00215     }
00216     std::cout<<"\
00217 \\end{tikzpicture}\n\
00218 \\end{document}\n";
00219 }
00220 
00221 
00222 #endif