SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/cst_sada.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2009 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef INCLUDED_SDSL_CST_SADA
00022 #define INCLUDED_SDSL_CST_SADA
00023 
00024 #include "int_vector.hpp"
00025 #include "algorithms.hpp"
00026 #include "iterators.hpp"
00027 #include "lcp_support_sada.hpp"
00028 #include "select_support_mcl.hpp"
00029 #include "bp_support.hpp"
00030 #include "bp_support_sada.hpp"
00031 #include "csa_sada.hpp" // for std initialization of cst_sada 
00032 #include "cst_iterators.hpp"
00033 #include "cst_sct.hpp" // compressed suffix tree based on the Super-Cartesian tree for the construction
00034 #include "cst_sct3.hpp" // compressed suffix tree based on the Super-Cartesian tree for the construction
00035 #include "util.hpp"
00036 #include <iostream>
00037 #include <algorithm>
00038 #include <cassert>
00039 #include <cstring> // for strlen
00040 #include <iomanip>
00041 #include <iterator>
00042 
00043 namespace sdsl
00044 {
00045 
00047 
00067 template<class Csa = csa_sada<>,
00068          class Lcp = lcp_support_sada<>,
00069          class Bp_support = bp_support_sada<>,
00070          class Rank_support10 = rank_support_v<10,2>,
00071          class Select_support10 = select_support_mcl<10,2>       >
00072 class cst_sada
00073 {
00074     public:
00075         typedef typename Csa::value_type                                                         value_type;    // STL Container requirement
00076         typedef cst_dfs_const_forward_iterator<cst_sada>                         const_iterator;// STL Container requirement
00077 //      typedef const_iterator                                                                           iterator;              // STL Container requirement
00078         typedef cst_bottom_up_const_forward_iterator<cst_sada>           const_bottom_up_iterator;
00079 //      typedef  const_bottom_up_iterator                                                        bottom_up_iterator;
00080         typedef const value_type                                                                         const_reference;
00081         typedef const_reference                                                                          reference;
00082         typedef const_reference*                                                                         pointer;
00083         typedef const pointer                                                                            const_pointer;
00084         typedef typename Csa::size_type                                                          size_type;             // STL Container requirement
00085         typedef size_type                                                                                        cst_size_type;
00086         typedef ptrdiff_t                                                                                        difference_type; // STL Container requirement
00087         typedef Csa                                                                                                      csa_type;
00088         typedef typename Lcp::template type<cst_sada>::lcp_type          lcp_type;
00089         typedef typename Csa::pattern_type                                                       pattern_type;// TODO: replace by csa::pattern type!!!???
00090         typedef typename Csa::char_type                                                          char_type;
00091         typedef size_type                                                                                        node_type; 
00092         typedef Bp_support                                                                                       bp_support_type;
00093 
00094         typedef cst_tag                                                                                          index_category;
00095     private:
00096         Csa                                     m_csa; // suffix array
00097         lcp_type                                m_lcp; // lcp information
00098         bit_vector                              m_bp;  // balanced parentheses sequence for suffix tree
00099         bp_support_type                 m_bp_support; // support for the balanced parentheses sequence
00100         Rank_support10                  m_bp_rank10;  // rank_support for leaves, i.e. "10" bit pattern
00101         Select_support10                m_bp_select10;// select_support for leaves, i.e. "10" bit pattern
00102 
00103         /* Get the number of leaves that are in the subtree rooted at the first child of v +
00104          * number of leafs in the subtrees rooted at the children of parent(v) which precede v in the tree.
00105          */
00106         size_type inorder(node_type v)const {
00107             return m_bp_rank10(m_bp_support.find_close(v+1)+1);
00108         }
00109 
00110         bool generate_bp(const cst_sct<Csa, Lcp>& temp_cst, const unsigned char* str) {
00111             typedef typename cst_sct<Csa, Lcp>::node_type cst_sct_node_type;
00112             m_bp.resize(4*temp_cst.csa.size());
00113             util::set_zero_bits(m_bp);
00114             if (temp_cst.csa.size() == 0)
00115                 return false;
00116             size_type cnt = 0;
00117             std::stack<cst_sct_node_type> right_most_path;
00118             std::stack<bool> used;
00119             cst_sct_node_type w = temp_cst.root();
00120             used.push(false);
00121             right_most_path.push(w);
00122             while (!right_most_path.empty()) {
00123                 if (used.top()) {
00124                     cnt++; // bits are already set to 0: m_bp[cnt++] = 0;
00125                     cst_sct_node_type v = right_most_path.top();
00126                     used.pop(); right_most_path.pop();
00127                     cst_sct_node_type sibling = temp_cst.sibling(v);
00128                     if (sibling != w) {
00129                         right_most_path.push(sibling);
00130                         used.push(false);
00131                     }
00132                 } else {
00133                     m_bp[cnt++] = 1;
00134                     used.top() = true;
00135                     cst_sct_node_type child = temp_cst.ith_child(right_most_path.top(), 1);
00136                     if (child != w) {
00137                         used.push(false);
00138                         right_most_path.push(child);
00139                     }
00140                 }
00141             }
00142             m_bp.resize(cnt);
00143             m_csa = Csa(temp_cst.csa, str);
00144             copy_lcp(m_lcp, temp_cst.m_lcp, *this);
00145             return true;
00146         }
00147 
00148         void copy(const cst_sada& cst) {
00149             m_csa                       = cst.m_csa;
00150             copy_lcp(m_lcp, cst.m_lcp, *this);
00151             m_bp                        = cst.m_bp;
00152             m_bp_support        = cst.m_bp_support;
00153             m_bp_support.set_vector(&m_bp);
00154             m_bp_rank10         = cst.m_bp_rank10;
00155             m_bp_rank10.set_vector(&m_bp);
00156             m_bp_select10   = cst.m_bp_select10;
00157             m_bp_select10.set_vector(&m_bp);
00158         }
00159 
00160     public:
00161         const Csa& csa;                                                 
00162         const lcp_type& lcp;                                                    
00163         const bit_vector& bp;                                   
00164         const Bp_support& bp_support;                   
00165         const Rank_support10& bp_rank_10;               
00166         const Select_support10& bp_select_10;   
00167 
00169         cst_sada(): csa(m_csa), lcp(m_lcp), bp(m_bp), bp_support(m_bp_support), bp_rank_10(m_bp_rank10), bp_select_10(m_bp_select10) {}
00171         ~cst_sada() {}
00173         cst_sada(const cst_sada& cst):csa(m_csa), lcp(m_lcp), bp(m_bp), bp_support(m_bp_support), bp_rank_10(m_bp_rank10), bp_select_10(m_bp_select10) {
00174             copy(cst);
00175         }
00176 
00177         template<uint8_t int_width, class size_type_class, uint8_t int_width_1, class size_type_class_1, uint8_t int_width_2, class size_type_class_2>
00178         cst_sada(const std::string& csa_file_name,
00179                  int_vector_file_buffer<int_width, size_type_class>& lcp_buf,
00180                  int_vector_file_buffer<int_width_1, size_type_class_1>& sa_buf,
00181                  int_vector_file_buffer<int_width_2, size_type_class_2>& isa_buf,
00182                  std::string dir="./",
00183                  bool build_ony_bps=false):csa(m_csa), lcp(m_lcp), bp(m_bp), bp_support(m_bp_support), bp_rank_10(m_bp_rank10), bp_select_10(m_bp_select10) {
00184             std::string id =  util::to_string(util::get_pid())+"_"+util::to_string(util::get_id()).c_str();
00185             {
00186                 write_R_output("cst", "construct BPS", "begin", 1, 0);
00187                 cst_sct3<> temp_cst(csa_file_name, lcp_buf, sa_buf, isa_buf, dir, true);
00188                 m_bp.resize(4*lcp_buf.int_vector_size);
00189                 util::set_zero_bits(m_bp);
00190                 size_type idx=0;
00191                 for (cst_sct3<>::const_iterator it=temp_cst.begin(), end=temp_cst.end(); it!=end; ++it) {
00192                     if (1 == it.visit())
00193                         m_bp[idx] = 1;
00194                     if (temp_cst.is_leaf(*it))
00195                         ++idx;
00196                     ++idx;
00197                 }
00198                 m_bp.resize(idx);
00199                 write_R_output("cst", "construct BPS", "end", 1, 0);
00200             }
00201             util::load_from_file(m_csa, csa_file_name.c_str());
00202 
00203             write_R_output("cst", "construct CLCP", "begin", 1,0);
00204             construct_lcp(m_lcp, *this, lcp_buf, isa_buf);
00205             write_R_output("cst", "construct CLCP", "end", 1,0);
00206 
00207             write_R_output("cst", "construct BPSS", "begin", 1,0);
00208             m_bp_support = Bp_support(&m_bp);
00209             util::init_support(m_bp_rank10, &m_bp);
00210             util::init_support(m_bp_select10, &m_bp);
00211             write_R_output("cst", "construct BPSS", "end", 1,0);
00212         }
00213 
00214         cst_sada(tMSS& file_map, const std::string dir, const std::string id, bool build_only_bps=false):csa(m_csa), lcp(m_lcp), bp(m_bp), bp_support(m_bp_support), bp_rank_10(m_bp_rank10), bp_select_10(m_bp_select10) {
00215             construct(file_map, dir, id, build_only_bps);
00216         }
00217 
00218 
00219         void construct(tMSS& file_map, const std::string dir, const std::string id, bool build_only_bps=false) {
00220             {
00221                 write_R_output("cst", "construct BPS", "begin", 1, 0);
00222                 cst_sct3<> temp_cst(file_map, dir, id, true);
00223                 m_bp.resize(4*(temp_cst.bp.size()/2));
00224                 util::set_zero_bits(m_bp);
00225                 size_type idx=0;
00226                 for (cst_sct3<>::const_iterator it=temp_cst.begin(), end=temp_cst.end(); it!=end; ++it) {
00227 //                                      std::cout<<idx<<std::endl;
00228                     if (1 == it.visit())
00229                         m_bp[idx] = 1;
00230                     if (temp_cst.is_leaf(*it))
00231                         ++idx;
00232                     ++idx;
00233                 }
00234                 m_bp.resize(idx);
00235                 write_R_output("cst", "construct BPS", "end", 1, 0);
00236             }
00237             write_R_output("cst", "construct BPSS", "begin", 1,0);
00238             m_bp_support = Bp_support(&m_bp);
00239             util::init_support(m_bp_rank10,   &m_bp);
00240             util::init_support(m_bp_select10, &m_bp);
00241             write_R_output("cst", "construct BPSS", "end", 1,0);
00242 
00243             write_R_output("cst", "construct CLCP", "begin", 1,0);
00244             construct_lcp(m_lcp, *this, file_map, dir, id);
00245             write_R_output("cst", "construct CLCP", "end", 1,0);
00246 
00247 
00248 
00249             util::load_from_file(m_csa, file_map["csa"].c_str());
00250         }
00251 
00253 
00256         size_type size()const {
00257             return m_csa.size();
00258         }
00259 
00261 
00264         static size_type max_size() {
00265             return Csa::max_size();
00266         }
00267 
00269 
00272         bool empty()const {
00273             return m_csa.empty();
00274         }
00275 
00277 
00285         void swap(cst_sada& cst) {
00286             if (this != &cst) {
00287                 m_csa.swap(cst.m_csa);
00288                 m_bp.swap(cst.m_bp);
00289                 util::swap_support(m_bp_support, cst.m_bp_support, &m_bp, &(cst.m_bp));
00290                 util::swap_support(m_bp_rank10, cst.m_bp_rank10, &m_bp, &(cst.m_bp));
00291                 util::swap_support(m_bp_select10, cst.m_bp_select10, &m_bp, &(cst.m_bp));
00292                 // anything else has to be swapped before swapping lcp
00293                 swap_lcp(m_lcp, cst.m_lcp, *this, cst);
00294             }
00295         }
00296 
00298 
00301         const_iterator begin()const {
00302             if (0 == m_bp.size())  // special case: tree is unintialized
00303                 return end();
00304             else if (m_csa.size() == 1) { // special case: the root is a leaf
00305                 return const_iterator(this, root(), true, true);
00306             }
00307             return const_iterator(this, root(), false, true);
00308         }
00309 
00311 
00314         const_iterator end()const {
00315             return const_iterator(this, root(), true, false);
00316         }
00317 
00319         const_bottom_up_iterator begin_bottom_up()const {
00320             if (0 == m_bp.size())  // special case: tree is uninitialized
00321                 return end_bottom_up();
00322             return const_bottom_up_iterator(this, leftmost_leaf_in_the_subtree(root()));
00323         }
00324 
00326         const_bottom_up_iterator end_bottom_up()const {
00327             return const_bottom_up_iterator(this, root(), false);
00328         }
00329 
00331 
00334         cst_sada& operator=(const cst_sada& cst) {
00335             if (this != &cst) {
00336                 copy(cst);
00337             }
00338             return *this;
00339         }
00340 
00342 
00347         bool operator==(const cst_sada& csa)const;
00348 
00350 
00355         bool operator!=(const cst_sada& csa)const;
00356 
00358 
00361         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const {
00362             structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
00363             size_type written_bytes = 0;
00364             written_bytes += m_csa.serialize(out, child, "csa");
00365             written_bytes += m_lcp.serialize(out, child, "lcp");
00366             written_bytes += m_bp.serialize(out, child, "bp");
00367             written_bytes += m_bp_support.serialize(out, child, "bp_support");
00368             written_bytes += m_bp_rank10.serialize(out, child, "bp_rank_10");
00369             written_bytes += m_bp_select10.serialize(out, child, "bp_select_10");
00370             structure_tree::add_size(child, written_bytes);
00371             return written_bytes;
00372         }
00373 
00375 
00377         void load(std::istream& in) {
00378             m_csa.load(in);
00379             load_lcp(m_lcp, in, *this);
00380             m_bp.load(in);
00381             m_bp_support.load(in, &m_bp);
00382             m_bp_rank10.load(in, &m_bp);
00383             m_bp_select10.load(in, &m_bp);
00384         }
00385 
00387         /* @{ */
00388 
00390 
00394         node_type root() const {
00395             return 0;
00396         }
00397 
00399 
00405         bool is_leaf(node_type v)const {
00406             assert(m_bp[v]==1);  // assert that v is a valid node of the suffix tree
00407             // if there is a closing parenthesis at position v+1, the node is a leaf
00408             return !m_bp[v+1];
00409         }
00410 
00412 
00419         node_type ith_leaf(size_type i)const {
00420             assert(i > 0 and i <= m_csa.size());
00421             // -1 as select(i) returns the postion of the 0 of pattern 10
00422             return m_bp_select10.select(i)-1;
00423         }
00424 
00426 
00432         size_type depth(node_type v)const {
00433             if (v == root())  // if v is the root
00434                 return 0;
00435 
00436             if (is_leaf(v)) { // if v is a leave
00437                 size_type i = m_bp_rank10(v); // get the index in the suffix array
00438                 return m_csa.size() - m_csa[i];
00439             }
00440             assert(inorder(v)>0);
00441             return m_lcp[inorder(v)]; // da ist es gut wenn lcp[0]=0. inorder-0 darf aber nicht auftretten, weil die wurzel bei leerem Baum entweder ein Blatt ist oder immer mindestens zwei Kinder hat...
00442         }
00443 
00445 
00451         size_type node_depth(node_type v)const {
00452             // -2 as the root() we assign depth=0 to the root
00453             return (m_bp_support.rank(v)<<1)-v-2;
00454         }
00455 
00457 
00464         size_type leaves_in_the_subtree(node_type v)const {
00465             size_type r = m_bp_support.find_close(v);
00466             return m_bp_rank10(r+1) - m_bp_rank10(v);
00467         }
00468 
00470 
00475         // 2010-12-08: Fixed method.
00476         node_type leftmost_leaf_in_the_subtree(const node_type& v)const {
00477             return m_bp_select10(m_bp_rank10(v)+1)-1;
00478         }
00479 
00481 
00486         // 2010-12-08: Fixed method.
00487         node_type rightmost_leaf_in_the_subtree(const node_type& v)const {
00488             size_type r = m_bp_support.find_close(v);
00489             return m_bp_select10(m_bp_rank10(r+1))-1;
00490         }
00491 
00493 
00500         size_type lb(const node_type& v)const {
00501             return m_bp_rank10(v);
00502         }
00503 
00505 
00512         size_type rb(const node_type& v)const {
00513             size_type r = m_bp_support.find_close(v);
00514             return m_bp_rank10(r+1)-1;
00515         }
00516 
00518 
00523         node_type parent(node_type v) const {
00524             assert(m_bp[v]==1); // assert a valid node
00525             if (v == root())
00526                 return root();
00527             else {
00528                 return m_bp_support.enclose(v);
00529             }
00530         }
00531 
00533 
00539         node_type sibling(node_type v)const {
00540             if (v==root())
00541                 return root();
00542             node_type sib = m_bp_support.find_close(v)+1;
00543             if (m_bp[sib])
00544                 return sib;
00545             else
00546                 return root();
00547         }
00548 
00550         /*
00551          *      \param v A valid tree node of the cst.
00552          *  \param c First character of the edge label from v to the desired child.
00553          *  \param char_pos Reference which will hold the position (0-based) of the matching char c in the sorted text/suffix array.
00554          *  \return The child node w which edge label (v,w) starts with c or root() if it does not exist.
00555          *  \par Time complexity
00556          *       \f$ \Order( (\saaccess+\isaaccess) \cdot \sigma + \lcpaccess) \f$
00557          *   \par Note
00558          *              With range median mimimum queries (RMMQ) one can code this operation in \f$\log \sigma \f$ time
00559          */
00560         // TODO: const unsigned char durch char type ersetzen
00561         node_type child(node_type v, const unsigned char c, size_type& char_pos)const {
00562             if (is_leaf(v))  // if v is a leaf = (), v has no child
00563                 return root();
00564             // else v = ( (     ))
00565             uint16_t cc = m_csa.char2comp[c];
00566             if (cc==0 and c!=0) // TODO: aendere char2comp so ab, dass man diesen sonderfall nicht braucht
00567                 return root();
00568             size_type char_ex_max_pos = m_csa.C[cc+1], char_inc_min_pos = m_csa.C[cc];
00569 
00570             size_type d = depth(v);  // time complexity: \lcpaccess
00571             size_type res = v+1;
00572             while (true) {
00573                 if (is_leaf(res)) {
00574 //                                      char_pos = m_csa( m_csa[ m_bp_rank10(res) ]+d );// replaced by the following line
00575                     char_pos = get_char_pos(m_bp_rank10(res), d, m_csa);
00576                 } else {
00577 //                                      char_pos = m_csa( m_csa[ inorder(res) ]+d );// replaced by the following line
00578                     char_pos = get_char_pos(inorder(res), d, m_csa);
00579                 }
00580 //                              std::cerr<<"char_pos="<<char_pos<<std::endl;
00581                 if (char_pos >= char_ex_max_pos)  // if the current char is lex. greater than the searched char: exit
00582                     return root();
00583                 if (char_pos >= char_inc_min_pos)  // if the current char is lex. equal with the
00584                     return res;
00585                 res = m_bp_support.find_close(res)+1;
00586                 if (!m_bp[res]) // closing parenthesis: there exists no next child
00587                     return root();
00588             }
00589         }
00590 
00592         // \sa child(node_type v, const unsigned char c, size_type &char_pos)
00593         node_type child(node_type v, const unsigned char c) {
00594             size_type char_pos;
00595             return child(v, c, char_pos);
00596         }
00597 
00599 
00607         node_type ith_child(node_type v, size_type i)const {
00608             if (is_leaf(v))  // if v is a leave, v has no child
00609                 return root();
00610             size_type res = v+1;
00611             while (i > 1) {
00612                 res = m_bp_support.find_close(res)+1;
00613                 if (!m_bp[res]) {// closing parenthesis: there exists no next child
00614                     return root();
00615                 }
00616                 --i;
00617             }
00618             return res;
00619         }
00620 
00622 
00629         unsigned char edge(node_type v, size_type d)const {
00630             if (d < 1 or d > depth(v)) {
00631                 throw std::out_of_range("OUT_OF_RANGE_ERROR: "+util::demangle(typeid(this).name())+" cst_sada<>::edge(node_type v, size_type d). d == 0 or d > depth(v)!");
00632             }
00633 
00634             size_type i = 0;// index of the first suffix in the subtree of v
00635             if (is_leaf(v)) { // if v is a leave
00636                 i = m_bp_rank10(v); // get the index in the suffix array
00637             } else {
00638                 i = inorder(v);
00639             }
00640 //                      size_type order = m_csa(m_csa[i]+d-1); // replaced by the following line
00641             size_type order = get_char_pos(i, d-1, m_csa);
00642             uint16_t c_begin = 1, c_end = m_csa.sigma+1, mid;
00643             while (c_begin < c_end) {
00644                 mid = (c_begin+c_end)>>1;
00645                 if (m_csa.C[mid] <= order) {
00646                     c_begin = mid+1;
00647                 } else {
00648                     c_end = mid;
00649                 }
00650             }
00651             return m_csa.comp2char[c_begin-1];
00652         }
00653 
00655 
00662         node_type lca(node_type v, node_type w)const {
00663             assert(m_bp[v] == 1 and m_bp[w] == 1);
00664             if (v > w) {
00665                 std::swap(v,w);
00666             } else if (v==w) {
00667                 return v;
00668             }
00669             if (v == root())
00670                 return root();
00671             return m_bp_support.double_enclose(v, w);
00672         }
00673 
00675 
00681         node_type sl(node_type v)const {
00682             if (v == root())
00683                 return root();
00684             // get leftmost leaf in the tree rooted at v
00685             size_type left              = m_bp_rank10(v);
00686             if (is_leaf(v)) {
00687                 return ith_leaf(m_csa.psi[left]+1);
00688             }
00689             // get the rightmost leaf in the tree rooted at v
00690             size_type right             = m_bp_rank10(m_bp_support.find_close(v))-1;
00691             assert(left < right);
00692             node_type left_leaf = ith_leaf(m_csa.psi[left]+1);
00693             node_type right_leaf= ith_leaf(m_csa.psi[right]+1);
00694             return lca(left_leaf, right_leaf);
00695         }
00696 
00698         /*
00699          *  \param v A valid not of a cst_sada.
00700          *  \param c The character which should be prepended to the string of the current node.
00701          *      \return root() if the Weiner link of (v, c) does not exist, otherwise the Weiner link is returned.
00702          *  \par Time complexity
00703          *              \f$ \Order{ t_{rank\_bwt} + t_{lca}}\f$
00704          */
00705         node_type wl(node_type v, const unsigned char c) const {
00706             // get leftmost leaf in the tree rooted at v
00707             size_type left              = m_bp_rank10(v);
00708             // get the rightmost leaf in the tree rooted at v
00709             size_type right = is_leaf(v) ? left : m_bp_rank10(m_bp_support.find_close(v))-1;
00710 
00711             size_type c_left    = m_csa.rank_bwt(left, c);
00712             size_type c_right   = m_csa.rank_bwt(right+1, c);
00713 
00714             if (c_left == c_right)  // there exists no Weiner link
00715                 return root();
00716             if (c_left+1 == c_right)
00717                 return ith_leaf(m_csa.C[m_csa.char2comp[c]] + c_left + 1);
00718             else {
00719                 size_type left  = m_csa.C[m_csa.char2comp[c]] + c_left;
00720                 size_type right = m_csa.C[m_csa.char2comp[c]] + c_right - 1;
00721                 assert(left < right);
00722                 node_type left_leaf = ith_leaf(left+1);
00723                 node_type right_leaf= ith_leaf(right+1);
00724                 return lca(left_leaf, right_leaf);
00725             }
00726         }
00727 
00729 
00734         size_type sn(node_type v)const {
00735             assert(is_leaf(v));
00736             // count the leaves left to leaf v
00737             return m_csa[m_bp_rank10(v)];
00738         }
00739 
00741 
00748         size_type id(node_type v)const {
00749             // v+1 is < m_bp.size(), as v is the position of an open parenthesis
00750             if (m_bp[v+1]) {    // case (a) inner node
00751                 return size() + (m_bp_support.rank(v) - 1) - m_bp_rank10(v);
00752             } else {            // case (b) leaf
00753                 return m_bp_rank10(v);
00754             }
00755             // each node has a unique opening parenthesis -> results in an integer between [0..2n-1]
00756             //  return m_bp_support.rank(v)-1;
00757         }
00758 
00760 
00767         size_type inv_id(size_type id) {
00768             if (id < size()) {  // the corresponding node is a leaf
00769                 return ith_leaf(id+1);
00770             } else { // the corresponding node is a inner node
00771                 id = id + 1 - size();
00772                 // solved by binary search; TODO: can be done in constant time by using a select structure on the bitpattern 11
00773                 size_type lb = 0, rb = m_bp.size(); // lb inclusive, rb exclusive
00774                 // invariant: arg(lb) < id, arg(rb)>= id
00775                 while (rb-lb > 1) {
00776                     size_type mid = lb + (rb-lb)/2; // mid \in [0..m_bp.size()-1]
00777                     if (m_bp[mid] == 0 and m_bp[mid-1] == 1) {  // if we are ``half on a leaf''
00778                         ++mid; //we step one to the right to include it
00779                     }
00780                     // get the number of open inner nodes before position mid, i.e. arg(mid)
00781                     size_type mid_id = m_bp_support.rank(mid-1) - m_bp_rank10(mid);  // Note: mid-1 is valid of mid is of type ``size_type'' as us the parameter of rank
00782                     if (mid_id < id) {
00783                         lb = mid;
00784                     } else { // mid_id >= x
00785                         rb = mid;
00786                     }
00787                 }
00788                 return lb;
00789             }
00790         }
00791 
00793         /*
00794          *  \return The number of nodes of the suffix tree.
00795          *  \par Time complexity
00796          *              \f$ \Order{1} \f$
00797          */
00798         size_type nodes()const {
00799             return m_bp.size()>>1;
00800         }
00801 
00803         /* \param lb Left bound of the lcp-interval [lb..rb] (inclusive).
00804          * \param rb Right bound of the lcp-interval [lb..rb] (inclusive).
00805          * \param l  Depth of the lcp-interval.
00806          *\ return The node in the suffix tree corresponding lcp-interval [lb..rb]
00807          */
00808         node_type node(size_type lb, size_type rb, size_type l=0) const {
00809             return lca(ith_leaf(lb+1), ith_leaf(rb+1));
00810         }
00811 
00813 
00819         size_type degree(node_type v)const {
00820             size_type res = 0;
00821             v = v+1;
00822             while (m_bp[v]) { // found open parentheses
00823                 ++res;
00824                 v = m_bp_support.find_close(v)+1;
00825             }
00826             return res;
00827         }
00828 
00830 
00834         size_type tlcp_idx(size_type i) const {
00835             size_type ii = 0;
00836             if (i > 0) {
00837                 size_type ipos   = m_bp_select10(i) - 1;  // -1 as select returns the position of the zero
00838                 size_type ip1pos = m_bp_select10(i+1) - 1;// "  "    "        "    "     "     "   "   "
00839                 ii    = m_bp_support.double_enclose(ipos, ip1pos);
00840             }
00841             ii = m_bp_support.find_close(ii);
00842             // all right, as bp[ii] = 0
00843             return ii - m_bp_support.rank(ii) - m_bp_rank10(ii);
00844         }
00845 
00847         void print_info()const {
00848             std::cout << "# size of cst in bytes per character" << std::endl;
00849             size_type cst_size = util::get_size_in_bytes(*this);
00850             std::cout << ((double)cst_size)/csa.size() << " # = "<< ((double)cst_size)/(1<<20) <<" MB"<<std::endl;
00851             std::cout<< ((double)util::get_size_in_bytes(csa))/cst_size << " # ratio of csa size" << std::endl;
00852             std::cout<< ((double)util::get_size_in_bytes(lcp))/cst_size << " # ratio of lcp size" << std::endl;
00853             std::cout<< ((double)util::get_size_in_bytes(bp))/cst_size << " # ratio of bp size" << std::endl;
00854             std::cout<< ((double)util::get_size_in_bytes(bp_support))/cst_size << " # ratio of bp_support size" << std::endl;
00855             std::cout<< ((double)util::get_size_in_bytes(bp_rank_10))/cst_size << " # ratio of bp_rank_10 size" << std::endl;
00856             std::cout<< ((double)util::get_size_in_bytes(bp_select_10))/cst_size << " # ratio of bp_select_10 size" << std::endl;
00857         }
00858 
00859         /* @} */
00860 
00861 };
00862 
00863 // == template functions ==
00864 
00865 
00866 } // end namespace sdsl
00867 
00868 #endif