SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2009 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef INCLUDED_SDSL_CST_SADA 00022 #define INCLUDED_SDSL_CST_SADA 00023 00024 #include "int_vector.hpp" 00025 #include "algorithms.hpp" 00026 #include "iterators.hpp" 00027 #include "lcp_support_sada.hpp" 00028 #include "select_support_mcl.hpp" 00029 #include "bp_support.hpp" 00030 #include "bp_support_sada.hpp" 00031 #include "csa_sada.hpp" // for std initialization of cst_sada 00032 #include "cst_iterators.hpp" 00033 #include "cst_sct.hpp" // compressed suffix tree based on the Super-Cartesian tree for the construction 00034 #include "cst_sct3.hpp" // compressed suffix tree based on the Super-Cartesian tree for the construction 00035 #include "util.hpp" 00036 #include <iostream> 00037 #include <algorithm> 00038 #include <cassert> 00039 #include <cstring> // for strlen 00040 #include <iomanip> 00041 #include <iterator> 00042 00043 namespace sdsl 00044 { 00045 00047 00067 template<class Csa = csa_sada<>, 00068 class Lcp = lcp_support_sada<>, 00069 class Bp_support = bp_support_sada<>, 00070 class Rank_support10 = rank_support_v<10,2>, 00071 class Select_support10 = select_support_mcl<10,2> > 00072 class cst_sada 00073 { 00074 public: 00075 typedef typename Csa::value_type value_type; // STL Container requirement 00076 typedef cst_dfs_const_forward_iterator<cst_sada> const_iterator;// STL Container requirement 00077 // typedef const_iterator iterator; // STL Container requirement 00078 typedef cst_bottom_up_const_forward_iterator<cst_sada> const_bottom_up_iterator; 00079 // typedef const_bottom_up_iterator bottom_up_iterator; 00080 typedef const value_type const_reference; 00081 typedef const_reference reference; 00082 typedef const_reference* pointer; 00083 typedef const pointer const_pointer; 00084 typedef typename Csa::size_type size_type; // STL Container requirement 00085 typedef size_type cst_size_type; 00086 typedef ptrdiff_t difference_type; // STL Container requirement 00087 typedef Csa csa_type; 00088 typedef typename Lcp::template type<cst_sada>::lcp_type lcp_type; 00089 typedef typename Csa::pattern_type pattern_type;// TODO: replace by csa::pattern type!!!??? 00090 typedef typename Csa::char_type char_type; 00091 typedef size_type node_type; 00092 typedef Bp_support bp_support_type; 00093 00094 typedef cst_tag index_category; 00095 private: 00096 Csa m_csa; // suffix array 00097 lcp_type m_lcp; // lcp information 00098 bit_vector m_bp; // balanced parentheses sequence for suffix tree 00099 bp_support_type m_bp_support; // support for the balanced parentheses sequence 00100 Rank_support10 m_bp_rank10; // rank_support for leaves, i.e. "10" bit pattern 00101 Select_support10 m_bp_select10;// select_support for leaves, i.e. "10" bit pattern 00102 00103 /* Get the number of leaves that are in the subtree rooted at the first child of v + 00104 * number of leafs in the subtrees rooted at the children of parent(v) which precede v in the tree. 00105 */ 00106 size_type inorder(node_type v)const { 00107 return m_bp_rank10(m_bp_support.find_close(v+1)+1); 00108 } 00109 00110 bool generate_bp(const cst_sct<Csa, Lcp>& temp_cst, const unsigned char* str) { 00111 typedef typename cst_sct<Csa, Lcp>::node_type cst_sct_node_type; 00112 m_bp.resize(4*temp_cst.csa.size()); 00113 util::set_zero_bits(m_bp); 00114 if (temp_cst.csa.size() == 0) 00115 return false; 00116 size_type cnt = 0; 00117 std::stack<cst_sct_node_type> right_most_path; 00118 std::stack<bool> used; 00119 cst_sct_node_type w = temp_cst.root(); 00120 used.push(false); 00121 right_most_path.push(w); 00122 while (!right_most_path.empty()) { 00123 if (used.top()) { 00124 cnt++; // bits are already set to 0: m_bp[cnt++] = 0; 00125 cst_sct_node_type v = right_most_path.top(); 00126 used.pop(); right_most_path.pop(); 00127 cst_sct_node_type sibling = temp_cst.sibling(v); 00128 if (sibling != w) { 00129 right_most_path.push(sibling); 00130 used.push(false); 00131 } 00132 } else { 00133 m_bp[cnt++] = 1; 00134 used.top() = true; 00135 cst_sct_node_type child = temp_cst.ith_child(right_most_path.top(), 1); 00136 if (child != w) { 00137 used.push(false); 00138 right_most_path.push(child); 00139 } 00140 } 00141 } 00142 m_bp.resize(cnt); 00143 m_csa = Csa(temp_cst.csa, str); 00144 copy_lcp(m_lcp, temp_cst.m_lcp, *this); 00145 return true; 00146 } 00147 00148 void copy(const cst_sada& cst) { 00149 m_csa = cst.m_csa; 00150 copy_lcp(m_lcp, cst.m_lcp, *this); 00151 m_bp = cst.m_bp; 00152 m_bp_support = cst.m_bp_support; 00153 m_bp_support.set_vector(&m_bp); 00154 m_bp_rank10 = cst.m_bp_rank10; 00155 m_bp_rank10.set_vector(&m_bp); 00156 m_bp_select10 = cst.m_bp_select10; 00157 m_bp_select10.set_vector(&m_bp); 00158 } 00159 00160 public: 00161 const Csa& csa; 00162 const lcp_type& lcp; 00163 const bit_vector& bp; 00164 const Bp_support& bp_support; 00165 const Rank_support10& bp_rank_10; 00166 const Select_support10& bp_select_10; 00167 00169 cst_sada(): csa(m_csa), lcp(m_lcp), bp(m_bp), bp_support(m_bp_support), bp_rank_10(m_bp_rank10), bp_select_10(m_bp_select10) {} 00171 ~cst_sada() {} 00173 cst_sada(const cst_sada& cst):csa(m_csa), lcp(m_lcp), bp(m_bp), bp_support(m_bp_support), bp_rank_10(m_bp_rank10), bp_select_10(m_bp_select10) { 00174 copy(cst); 00175 } 00176 00177 template<uint8_t int_width, class size_type_class, uint8_t int_width_1, class size_type_class_1, uint8_t int_width_2, class size_type_class_2> 00178 cst_sada(const std::string& csa_file_name, 00179 int_vector_file_buffer<int_width, size_type_class>& lcp_buf, 00180 int_vector_file_buffer<int_width_1, size_type_class_1>& sa_buf, 00181 int_vector_file_buffer<int_width_2, size_type_class_2>& isa_buf, 00182 std::string dir="./", 00183 bool build_ony_bps=false):csa(m_csa), lcp(m_lcp), bp(m_bp), bp_support(m_bp_support), bp_rank_10(m_bp_rank10), bp_select_10(m_bp_select10) { 00184 std::string id = util::to_string(util::get_pid())+"_"+util::to_string(util::get_id()).c_str(); 00185 { 00186 write_R_output("cst", "construct BPS", "begin", 1, 0); 00187 cst_sct3<> temp_cst(csa_file_name, lcp_buf, sa_buf, isa_buf, dir, true); 00188 m_bp.resize(4*lcp_buf.int_vector_size); 00189 util::set_zero_bits(m_bp); 00190 size_type idx=0; 00191 for (cst_sct3<>::const_iterator it=temp_cst.begin(), end=temp_cst.end(); it!=end; ++it) { 00192 if (1 == it.visit()) 00193 m_bp[idx] = 1; 00194 if (temp_cst.is_leaf(*it)) 00195 ++idx; 00196 ++idx; 00197 } 00198 m_bp.resize(idx); 00199 write_R_output("cst", "construct BPS", "end", 1, 0); 00200 } 00201 util::load_from_file(m_csa, csa_file_name.c_str()); 00202 00203 write_R_output("cst", "construct CLCP", "begin", 1,0); 00204 construct_lcp(m_lcp, *this, lcp_buf, isa_buf); 00205 write_R_output("cst", "construct CLCP", "end", 1,0); 00206 00207 write_R_output("cst", "construct BPSS", "begin", 1,0); 00208 m_bp_support = Bp_support(&m_bp); 00209 util::init_support(m_bp_rank10, &m_bp); 00210 util::init_support(m_bp_select10, &m_bp); 00211 write_R_output("cst", "construct BPSS", "end", 1,0); 00212 } 00213 00214 cst_sada(tMSS& file_map, const std::string dir, const std::string id, bool build_only_bps=false):csa(m_csa), lcp(m_lcp), bp(m_bp), bp_support(m_bp_support), bp_rank_10(m_bp_rank10), bp_select_10(m_bp_select10) { 00215 construct(file_map, dir, id, build_only_bps); 00216 } 00217 00218 00219 void construct(tMSS& file_map, const std::string dir, const std::string id, bool build_only_bps=false) { 00220 { 00221 write_R_output("cst", "construct BPS", "begin", 1, 0); 00222 cst_sct3<> temp_cst(file_map, dir, id, true); 00223 m_bp.resize(4*(temp_cst.bp.size()/2)); 00224 util::set_zero_bits(m_bp); 00225 size_type idx=0; 00226 for (cst_sct3<>::const_iterator it=temp_cst.begin(), end=temp_cst.end(); it!=end; ++it) { 00227 // std::cout<<idx<<std::endl; 00228 if (1 == it.visit()) 00229 m_bp[idx] = 1; 00230 if (temp_cst.is_leaf(*it)) 00231 ++idx; 00232 ++idx; 00233 } 00234 m_bp.resize(idx); 00235 write_R_output("cst", "construct BPS", "end", 1, 0); 00236 } 00237 write_R_output("cst", "construct BPSS", "begin", 1,0); 00238 m_bp_support = Bp_support(&m_bp); 00239 util::init_support(m_bp_rank10, &m_bp); 00240 util::init_support(m_bp_select10, &m_bp); 00241 write_R_output("cst", "construct BPSS", "end", 1,0); 00242 00243 write_R_output("cst", "construct CLCP", "begin", 1,0); 00244 construct_lcp(m_lcp, *this, file_map, dir, id); 00245 write_R_output("cst", "construct CLCP", "end", 1,0); 00246 00247 00248 00249 util::load_from_file(m_csa, file_map["csa"].c_str()); 00250 } 00251 00253 00256 size_type size()const { 00257 return m_csa.size(); 00258 } 00259 00261 00264 static size_type max_size() { 00265 return Csa::max_size(); 00266 } 00267 00269 00272 bool empty()const { 00273 return m_csa.empty(); 00274 } 00275 00277 00285 void swap(cst_sada& cst) { 00286 if (this != &cst) { 00287 m_csa.swap(cst.m_csa); 00288 m_bp.swap(cst.m_bp); 00289 util::swap_support(m_bp_support, cst.m_bp_support, &m_bp, &(cst.m_bp)); 00290 util::swap_support(m_bp_rank10, cst.m_bp_rank10, &m_bp, &(cst.m_bp)); 00291 util::swap_support(m_bp_select10, cst.m_bp_select10, &m_bp, &(cst.m_bp)); 00292 // anything else has to be swapped before swapping lcp 00293 swap_lcp(m_lcp, cst.m_lcp, *this, cst); 00294 } 00295 } 00296 00298 00301 const_iterator begin()const { 00302 if (0 == m_bp.size()) // special case: tree is unintialized 00303 return end(); 00304 else if (m_csa.size() == 1) { // special case: the root is a leaf 00305 return const_iterator(this, root(), true, true); 00306 } 00307 return const_iterator(this, root(), false, true); 00308 } 00309 00311 00314 const_iterator end()const { 00315 return const_iterator(this, root(), true, false); 00316 } 00317 00319 const_bottom_up_iterator begin_bottom_up()const { 00320 if (0 == m_bp.size()) // special case: tree is uninitialized 00321 return end_bottom_up(); 00322 return const_bottom_up_iterator(this, leftmost_leaf_in_the_subtree(root())); 00323 } 00324 00326 const_bottom_up_iterator end_bottom_up()const { 00327 return const_bottom_up_iterator(this, root(), false); 00328 } 00329 00331 00334 cst_sada& operator=(const cst_sada& cst) { 00335 if (this != &cst) { 00336 copy(cst); 00337 } 00338 return *this; 00339 } 00340 00342 00347 bool operator==(const cst_sada& csa)const; 00348 00350 00355 bool operator!=(const cst_sada& csa)const; 00356 00358 00361 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { 00362 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00363 size_type written_bytes = 0; 00364 written_bytes += m_csa.serialize(out, child, "csa"); 00365 written_bytes += m_lcp.serialize(out, child, "lcp"); 00366 written_bytes += m_bp.serialize(out, child, "bp"); 00367 written_bytes += m_bp_support.serialize(out, child, "bp_support"); 00368 written_bytes += m_bp_rank10.serialize(out, child, "bp_rank_10"); 00369 written_bytes += m_bp_select10.serialize(out, child, "bp_select_10"); 00370 structure_tree::add_size(child, written_bytes); 00371 return written_bytes; 00372 } 00373 00375 00377 void load(std::istream& in) { 00378 m_csa.load(in); 00379 load_lcp(m_lcp, in, *this); 00380 m_bp.load(in); 00381 m_bp_support.load(in, &m_bp); 00382 m_bp_rank10.load(in, &m_bp); 00383 m_bp_select10.load(in, &m_bp); 00384 } 00385 00387 /* @{ */ 00388 00390 00394 node_type root() const { 00395 return 0; 00396 } 00397 00399 00405 bool is_leaf(node_type v)const { 00406 assert(m_bp[v]==1); // assert that v is a valid node of the suffix tree 00407 // if there is a closing parenthesis at position v+1, the node is a leaf 00408 return !m_bp[v+1]; 00409 } 00410 00412 00419 node_type ith_leaf(size_type i)const { 00420 assert(i > 0 and i <= m_csa.size()); 00421 // -1 as select(i) returns the postion of the 0 of pattern 10 00422 return m_bp_select10.select(i)-1; 00423 } 00424 00426 00432 size_type depth(node_type v)const { 00433 if (v == root()) // if v is the root 00434 return 0; 00435 00436 if (is_leaf(v)) { // if v is a leave 00437 size_type i = m_bp_rank10(v); // get the index in the suffix array 00438 return m_csa.size() - m_csa[i]; 00439 } 00440 assert(inorder(v)>0); 00441 return m_lcp[inorder(v)]; // da ist es gut wenn lcp[0]=0. inorder-0 darf aber nicht auftretten, weil die wurzel bei leerem Baum entweder ein Blatt ist oder immer mindestens zwei Kinder hat... 00442 } 00443 00445 00451 size_type node_depth(node_type v)const { 00452 // -2 as the root() we assign depth=0 to the root 00453 return (m_bp_support.rank(v)<<1)-v-2; 00454 } 00455 00457 00464 size_type leaves_in_the_subtree(node_type v)const { 00465 size_type r = m_bp_support.find_close(v); 00466 return m_bp_rank10(r+1) - m_bp_rank10(v); 00467 } 00468 00470 00475 // 2010-12-08: Fixed method. 00476 node_type leftmost_leaf_in_the_subtree(const node_type& v)const { 00477 return m_bp_select10(m_bp_rank10(v)+1)-1; 00478 } 00479 00481 00486 // 2010-12-08: Fixed method. 00487 node_type rightmost_leaf_in_the_subtree(const node_type& v)const { 00488 size_type r = m_bp_support.find_close(v); 00489 return m_bp_select10(m_bp_rank10(r+1))-1; 00490 } 00491 00493 00500 size_type lb(const node_type& v)const { 00501 return m_bp_rank10(v); 00502 } 00503 00505 00512 size_type rb(const node_type& v)const { 00513 size_type r = m_bp_support.find_close(v); 00514 return m_bp_rank10(r+1)-1; 00515 } 00516 00518 00523 node_type parent(node_type v) const { 00524 assert(m_bp[v]==1); // assert a valid node 00525 if (v == root()) 00526 return root(); 00527 else { 00528 return m_bp_support.enclose(v); 00529 } 00530 } 00531 00533 00539 node_type sibling(node_type v)const { 00540 if (v==root()) 00541 return root(); 00542 node_type sib = m_bp_support.find_close(v)+1; 00543 if (m_bp[sib]) 00544 return sib; 00545 else 00546 return root(); 00547 } 00548 00550 /* 00551 * \param v A valid tree node of the cst. 00552 * \param c First character of the edge label from v to the desired child. 00553 * \param char_pos Reference which will hold the position (0-based) of the matching char c in the sorted text/suffix array. 00554 * \return The child node w which edge label (v,w) starts with c or root() if it does not exist. 00555 * \par Time complexity 00556 * \f$ \Order( (\saaccess+\isaaccess) \cdot \sigma + \lcpaccess) \f$ 00557 * \par Note 00558 * With range median mimimum queries (RMMQ) one can code this operation in \f$\log \sigma \f$ time 00559 */ 00560 // TODO: const unsigned char durch char type ersetzen 00561 node_type child(node_type v, const unsigned char c, size_type& char_pos)const { 00562 if (is_leaf(v)) // if v is a leaf = (), v has no child 00563 return root(); 00564 // else v = ( ( )) 00565 uint16_t cc = m_csa.char2comp[c]; 00566 if (cc==0 and c!=0) // TODO: aendere char2comp so ab, dass man diesen sonderfall nicht braucht 00567 return root(); 00568 size_type char_ex_max_pos = m_csa.C[cc+1], char_inc_min_pos = m_csa.C[cc]; 00569 00570 size_type d = depth(v); // time complexity: \lcpaccess 00571 size_type res = v+1; 00572 while (true) { 00573 if (is_leaf(res)) { 00574 // char_pos = m_csa( m_csa[ m_bp_rank10(res) ]+d );// replaced by the following line 00575 char_pos = get_char_pos(m_bp_rank10(res), d, m_csa); 00576 } else { 00577 // char_pos = m_csa( m_csa[ inorder(res) ]+d );// replaced by the following line 00578 char_pos = get_char_pos(inorder(res), d, m_csa); 00579 } 00580 // std::cerr<<"char_pos="<<char_pos<<std::endl; 00581 if (char_pos >= char_ex_max_pos) // if the current char is lex. greater than the searched char: exit 00582 return root(); 00583 if (char_pos >= char_inc_min_pos) // if the current char is lex. equal with the 00584 return res; 00585 res = m_bp_support.find_close(res)+1; 00586 if (!m_bp[res]) // closing parenthesis: there exists no next child 00587 return root(); 00588 } 00589 } 00590 00592 // \sa child(node_type v, const unsigned char c, size_type &char_pos) 00593 node_type child(node_type v, const unsigned char c) { 00594 size_type char_pos; 00595 return child(v, c, char_pos); 00596 } 00597 00599 00607 node_type ith_child(node_type v, size_type i)const { 00608 if (is_leaf(v)) // if v is a leave, v has no child 00609 return root(); 00610 size_type res = v+1; 00611 while (i > 1) { 00612 res = m_bp_support.find_close(res)+1; 00613 if (!m_bp[res]) {// closing parenthesis: there exists no next child 00614 return root(); 00615 } 00616 --i; 00617 } 00618 return res; 00619 } 00620 00622 00629 unsigned char edge(node_type v, size_type d)const { 00630 if (d < 1 or d > depth(v)) { 00631 throw std::out_of_range("OUT_OF_RANGE_ERROR: "+util::demangle(typeid(this).name())+" cst_sada<>::edge(node_type v, size_type d). d == 0 or d > depth(v)!"); 00632 } 00633 00634 size_type i = 0;// index of the first suffix in the subtree of v 00635 if (is_leaf(v)) { // if v is a leave 00636 i = m_bp_rank10(v); // get the index in the suffix array 00637 } else { 00638 i = inorder(v); 00639 } 00640 // size_type order = m_csa(m_csa[i]+d-1); // replaced by the following line 00641 size_type order = get_char_pos(i, d-1, m_csa); 00642 uint16_t c_begin = 1, c_end = m_csa.sigma+1, mid; 00643 while (c_begin < c_end) { 00644 mid = (c_begin+c_end)>>1; 00645 if (m_csa.C[mid] <= order) { 00646 c_begin = mid+1; 00647 } else { 00648 c_end = mid; 00649 } 00650 } 00651 return m_csa.comp2char[c_begin-1]; 00652 } 00653 00655 00662 node_type lca(node_type v, node_type w)const { 00663 assert(m_bp[v] == 1 and m_bp[w] == 1); 00664 if (v > w) { 00665 std::swap(v,w); 00666 } else if (v==w) { 00667 return v; 00668 } 00669 if (v == root()) 00670 return root(); 00671 return m_bp_support.double_enclose(v, w); 00672 } 00673 00675 00681 node_type sl(node_type v)const { 00682 if (v == root()) 00683 return root(); 00684 // get leftmost leaf in the tree rooted at v 00685 size_type left = m_bp_rank10(v); 00686 if (is_leaf(v)) { 00687 return ith_leaf(m_csa.psi[left]+1); 00688 } 00689 // get the rightmost leaf in the tree rooted at v 00690 size_type right = m_bp_rank10(m_bp_support.find_close(v))-1; 00691 assert(left < right); 00692 node_type left_leaf = ith_leaf(m_csa.psi[left]+1); 00693 node_type right_leaf= ith_leaf(m_csa.psi[right]+1); 00694 return lca(left_leaf, right_leaf); 00695 } 00696 00698 /* 00699 * \param v A valid not of a cst_sada. 00700 * \param c The character which should be prepended to the string of the current node. 00701 * \return root() if the Weiner link of (v, c) does not exist, otherwise the Weiner link is returned. 00702 * \par Time complexity 00703 * \f$ \Order{ t_{rank\_bwt} + t_{lca}}\f$ 00704 */ 00705 node_type wl(node_type v, const unsigned char c) const { 00706 // get leftmost leaf in the tree rooted at v 00707 size_type left = m_bp_rank10(v); 00708 // get the rightmost leaf in the tree rooted at v 00709 size_type right = is_leaf(v) ? left : m_bp_rank10(m_bp_support.find_close(v))-1; 00710 00711 size_type c_left = m_csa.rank_bwt(left, c); 00712 size_type c_right = m_csa.rank_bwt(right+1, c); 00713 00714 if (c_left == c_right) // there exists no Weiner link 00715 return root(); 00716 if (c_left+1 == c_right) 00717 return ith_leaf(m_csa.C[m_csa.char2comp[c]] + c_left + 1); 00718 else { 00719 size_type left = m_csa.C[m_csa.char2comp[c]] + c_left; 00720 size_type right = m_csa.C[m_csa.char2comp[c]] + c_right - 1; 00721 assert(left < right); 00722 node_type left_leaf = ith_leaf(left+1); 00723 node_type right_leaf= ith_leaf(right+1); 00724 return lca(left_leaf, right_leaf); 00725 } 00726 } 00727 00729 00734 size_type sn(node_type v)const { 00735 assert(is_leaf(v)); 00736 // count the leaves left to leaf v 00737 return m_csa[m_bp_rank10(v)]; 00738 } 00739 00741 00748 size_type id(node_type v)const { 00749 // v+1 is < m_bp.size(), as v is the position of an open parenthesis 00750 if (m_bp[v+1]) { // case (a) inner node 00751 return size() + (m_bp_support.rank(v) - 1) - m_bp_rank10(v); 00752 } else { // case (b) leaf 00753 return m_bp_rank10(v); 00754 } 00755 // each node has a unique opening parenthesis -> results in an integer between [0..2n-1] 00756 // return m_bp_support.rank(v)-1; 00757 } 00758 00760 00767 size_type inv_id(size_type id) { 00768 if (id < size()) { // the corresponding node is a leaf 00769 return ith_leaf(id+1); 00770 } else { // the corresponding node is a inner node 00771 id = id + 1 - size(); 00772 // solved by binary search; TODO: can be done in constant time by using a select structure on the bitpattern 11 00773 size_type lb = 0, rb = m_bp.size(); // lb inclusive, rb exclusive 00774 // invariant: arg(lb) < id, arg(rb)>= id 00775 while (rb-lb > 1) { 00776 size_type mid = lb + (rb-lb)/2; // mid \in [0..m_bp.size()-1] 00777 if (m_bp[mid] == 0 and m_bp[mid-1] == 1) { // if we are ``half on a leaf'' 00778 ++mid; //we step one to the right to include it 00779 } 00780 // get the number of open inner nodes before position mid, i.e. arg(mid) 00781 size_type mid_id = m_bp_support.rank(mid-1) - m_bp_rank10(mid); // Note: mid-1 is valid of mid is of type ``size_type'' as us the parameter of rank 00782 if (mid_id < id) { 00783 lb = mid; 00784 } else { // mid_id >= x 00785 rb = mid; 00786 } 00787 } 00788 return lb; 00789 } 00790 } 00791 00793 /* 00794 * \return The number of nodes of the suffix tree. 00795 * \par Time complexity 00796 * \f$ \Order{1} \f$ 00797 */ 00798 size_type nodes()const { 00799 return m_bp.size()>>1; 00800 } 00801 00803 /* \param lb Left bound of the lcp-interval [lb..rb] (inclusive). 00804 * \param rb Right bound of the lcp-interval [lb..rb] (inclusive). 00805 * \param l Depth of the lcp-interval. 00806 *\ return The node in the suffix tree corresponding lcp-interval [lb..rb] 00807 */ 00808 node_type node(size_type lb, size_type rb, size_type l=0) const { 00809 return lca(ith_leaf(lb+1), ith_leaf(rb+1)); 00810 } 00811 00813 00819 size_type degree(node_type v)const { 00820 size_type res = 0; 00821 v = v+1; 00822 while (m_bp[v]) { // found open parentheses 00823 ++res; 00824 v = m_bp_support.find_close(v)+1; 00825 } 00826 return res; 00827 } 00828 00830 00834 size_type tlcp_idx(size_type i) const { 00835 size_type ii = 0; 00836 if (i > 0) { 00837 size_type ipos = m_bp_select10(i) - 1; // -1 as select returns the position of the zero 00838 size_type ip1pos = m_bp_select10(i+1) - 1;// " " " " " " " " " 00839 ii = m_bp_support.double_enclose(ipos, ip1pos); 00840 } 00841 ii = m_bp_support.find_close(ii); 00842 // all right, as bp[ii] = 0 00843 return ii - m_bp_support.rank(ii) - m_bp_rank10(ii); 00844 } 00845 00847 void print_info()const { 00848 std::cout << "# size of cst in bytes per character" << std::endl; 00849 size_type cst_size = util::get_size_in_bytes(*this); 00850 std::cout << ((double)cst_size)/csa.size() << " # = "<< ((double)cst_size)/(1<<20) <<" MB"<<std::endl; 00851 std::cout<< ((double)util::get_size_in_bytes(csa))/cst_size << " # ratio of csa size" << std::endl; 00852 std::cout<< ((double)util::get_size_in_bytes(lcp))/cst_size << " # ratio of lcp size" << std::endl; 00853 std::cout<< ((double)util::get_size_in_bytes(bp))/cst_size << " # ratio of bp size" << std::endl; 00854 std::cout<< ((double)util::get_size_in_bytes(bp_support))/cst_size << " # ratio of bp_support size" << std::endl; 00855 std::cout<< ((double)util::get_size_in_bytes(bp_rank_10))/cst_size << " # ratio of bp_rank_10 size" << std::endl; 00856 std::cout<< ((double)util::get_size_in_bytes(bp_select_10))/cst_size << " # ratio of bp_select_10 size" << std::endl; 00857 } 00858 00859 /* @} */ 00860 00861 }; 00862 00863 // == template functions == 00864 00865 00866 } // end namespace sdsl 00867 00868 #endif