SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/cst3_sct3p.hpp
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2010 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef INCLUDED_SDSL_CST_SCT3P
00022 #define INCLUDED_SDSL_CST_SCT3P
00023 
00024 #include "int_vector.hpp"
00025 #include "algorithms.hpp"
00026 #include "iterators.hpp"
00027 #include "lcp_support_tree.hpp"
00028 #include "lcp_wt.hpp"
00029 #include "bp_support.hpp"
00030 #include "csa_wt.hpp" // for std initialization of cst_sct3p 
00031 #include "csa_uncompressed.hpp"
00032 #include "cst_iterators.hpp"
00033 #include "cst_sct.hpp" // for lcp_interval
00034 #include "rank_support.hpp"
00035 #include "testutils.hpp"
00036 #include "util.hpp"
00037 #include <iostream>
00038 #include <algorithm>
00039 #include <cassert>
00040 #include <cstring> // for strlen
00041 #include <iomanip>
00042 #include <iterator>
00043 #include <stack> // for the calculation of the balanced parantheses sequence
00044 #include <ostream>
00045 
00046 namespace sdsl
00047 {
00048 
00049 struct cst_tag; // forward declaration
00050 
00051 template<class Int = int_vector<>::size_type /*size_t*/>
00052 struct bp_interval_p
00053 {
00054     Int ipos;  // position of the i+1th opening parenthesis in the balanced parenthese sequence
00055     Int cipos; // positon of the matching closing parenthesis of the i+1th openeing parenthesis in the balanced parentheses sequence
00056     Int jp1pos; // position of the j+2th opening parentheis in the balanced parentheses sequence
00057 
00059 
00061     bp_interval_p(Int ipos=0, Int cipos=0, Int jp1pos=0):ipos(ipos),cipos(cipos),jp1pos(jp1pos) {};
00062 
00063 
00064     bool operator<(const bp_interval_p& interval)const {
00065         if (ipos!=interval.ipos)
00066             return ipos<interval.ipos;
00067         return jp1pos<interval.jp1pos;
00068     }
00069 
00071 
00073     bool operator==(const bp_interval_p& interval)const {
00074         return ipos==interval.ipos and
00075                jp1pos==interval.jp1pos;
00076     }
00077 
00079 
00081     bool operator!=(const bp_interval_p& interval)const {
00082         return !(*this==interval);
00083     }
00084 
00086 
00088     bp_interval_p& operator=(const bp_interval_p& interval) {
00089         ipos = interval.ipos;
00090         cipos = interval.cipos;
00091         jp1pos = interval.jp1pos;
00092         return *this;
00093     }
00094 };
00095 
00096 
00097 template<class Int>
00098 inline std::ostream& operator<<(std::ostream& os, const bp_interval_p<Int>& interval)
00099 {
00100     os<<"-("<<interval.ipos<<","<<interval.cipos<<","<<interval.jp1pos<<")";
00101     return os;
00102 }
00103 
00104 
00106 
00126 template<class Csa = csa_wt<>, class Lcp = lcp_support_tree<lcp_wt<> >, class Bp_support = bp_support_sada<>, class Rank_support = rank_support_v5<> >
00127 class cst_sct3p
00128 {
00129     public:
00130         typedef typename Csa::value_type                                                         value_type;    // STL Container requirement/TODO: ist das nicht gleich node type???
00131         typedef cst_dfs_const_forward_iterator<cst_sct3p>                        const_iterator;// STL Container requirement
00132 //      typedef const_iterator                                                                           iterator;              // STL Container requirement
00133         typedef cst_bottom_up_const_forward_iterator<cst_sct3p>          const_bottom_up_iterator;
00134 //      typedef  const_bottom_up_iterator                                                        bottom_up_iterator;
00135         typedef const value_type                                                                         const_reference;
00136         typedef const_reference                                                                          reference;
00137         typedef const_reference*                                                                         pointer;
00138         typedef const pointer                                                                            const_pointer;
00139         typedef typename Csa::size_type                                                          size_type;             // STL Container requirement
00140         typedef size_type                                                                                        cst_size_type;
00141         typedef ptrdiff_t                                                                                        difference_type; // STL Container requirement
00142         typedef Csa                                                                                                      csa_type;
00143         typedef typename Lcp::template type<cst_sct3p>::lcp_type                 lcp_type;
00144         typedef Bp_support                                                                                       bp_support_type;
00145         typedef typename Csa::pattern_type                                                       pattern_type;
00146         typedef typename Csa::char_type                                                          char_type;
00147         typedef bp_interval_p<size_type>                                                         node_type; 
00148         typedef Rank_support                                                                             fc_rank_support_type;
00149 
00150         typedef cst_tag                                                                                          index_category;
00151     private:
00152         Csa                                     m_csa;
00153         lcp_type                                m_lcp;
00154         bit_vector                              m_bp;
00155         bp_support_type                 m_bp_support;
00156         bit_vector                              m_first_child; // implementation note: no rank structure is needed for the first_child bit_vector, except for id()
00157         fc_rank_support_type    m_first_child_rank;
00158         uint8_t                                 m_sigma;
00159         size_type                               m_nodes;
00160 
00161         void copy(const cst_sct3p& cst) {
00162             m_csa                       = cst.m_csa;
00163             copy_lcp(m_lcp, cst.m_lcp, *this);
00164             m_bp                        = cst.m_bp;
00165             m_bp_support        = cst.m_bp_support;
00166             m_bp_support.set_vector(&m_bp);
00167             m_first_child       = cst.m_first_child;
00168             m_first_child_rank = cst.m_first_child_rank;
00169             m_first_child_rank.set_vector(&m_first_child);
00170             m_sigma                     = cst.m_sigma;
00171             m_nodes                     = cst.m_nodes;
00172         }
00173 
00174         // Get the first l index of a [i,j] interval.
00175         /* I.e. given an interval [i,j], the function returns the position of the smallest entry lcp[k] with \f$ i<k\leq j \f$
00176          * \par Time complexity
00177          *       \f$ \Order{1} \f$
00178          */
00179         inline size_type get_first_l_index(const node_type& node, size_type& kpos, size_type& ckpos)const {
00180             if (node.cipos > node.jp1pos) { // corresponds to m_lcp[i] <= m_lcp[j+1]
00181                 ckpos   = node.jp1pos-1;
00182             } else { // corresponds to m_lcp[i] > m_lcp[j+1]
00183                 ckpos   = node.cipos-1;
00184             }
00185             assert(m_bp[ckpos]==0);
00186             kpos        = m_bp_support.find_open(ckpos);
00187             return m_bp_support.rank(kpos)-1;
00188         }
00189 
00190         // Get the ith l index of a node
00191         // if there exists no ith l-index return node.j+1
00192         size_type get_ith_l_index(const node_type& node, size_type i, size_type& kpos, size_type& ckpos)const {
00193             uint64_t children = 0;
00194 
00195             assert(i > 0);
00196             if (node.cipos > node.jp1pos) { // corresponds to m_lcp[i] <= m_lcp[j+1]
00197                 ckpos   = node.jp1pos-1;
00198             } else { // corresponds to m_lcp[i] > m_lcp[j+1]
00199                 ckpos   = node.cipos-1;
00200             }
00201             assert(m_bp[ckpos] == 0);   // at least the first l-index should be present, i.e. node is not leaf
00202             if (1 == i) {
00203                 kpos    = m_bp_support.find_open(ckpos);
00204                 return m_bp_support.rank(kpos)-1;
00205             } else { // i > 1
00206                 size_type r = ckpos - m_bp_support.rank(ckpos); // numbers of closing parentheses - 1 = index of first child in m_first_child
00207                 if (r+1 >= i) { // if there exist more than i l-indices
00208                     // check if m_first_child[r-i+1..r-1] consists of zeros
00209                     const uint64_t* p = m_first_child.data() + (r>>6);
00210                     uint8_t offset = r&0x3F;
00211 
00212                     uint64_t w = (*p) & bit_magic::Li1Mask[offset];
00213                     if (w) {
00214                         children = offset - bit_magic::l1BP(w) + 1;
00215                     } else if (m_first_child.data() == p) { // w==0 and we are in the first word
00216                         children = offset + 2; // da bit_magic::l1BP(w)=-1 sein muesste
00217                     } else {
00218                         children = offset + 2;
00219                         while (p > m_first_child.data()) {
00220                             w = *(--p);
00221                             if (0==w)
00222                                 children += 64;
00223                             else {
00224                                 children += (63-bit_magic::l1BP(w));
00225                                 break;
00226                             }
00227                         }
00228                         children += (w==0);
00229                     }
00230                     if (i < children) {  // there exists an ith l-index
00231                         ckpos -= (i-1);
00232                         assert(m_bp[ckpos] == 0);
00233                         kpos   = m_bp_support.find_open(ckpos);
00234                         return m_bp_support.rank(kpos)-1;
00235                     }
00236                 }
00237                 // if i >= children
00238                 kpos = node.jp1pos;
00239                 if (kpos < m_bp.size())
00240                     ckpos = m_bp_support.find_close(kpos);
00241                 else
00242                     ckpos = m_bp.size();
00243                 return node.j+1;
00244             }
00245         }
00246 
00247         // Get the postion of the first l index of a l-[i,j] interval in the balanced parentheses sequence.
00248         /* \par Time complexity
00249          *              \f$ \Order{1} \f$
00250          */
00251         inline size_type get_pos_of_closing_parenthesis_of_the_first_l_index(const node_type& node)const {
00252             if (node.cipos > node.jp1pos) { // corresponds to m_lcp[i] <= m_lcp[j+1]
00253                 return node.jp1pos-1;
00254             } else { // corresponds to m_lcp[i] > m_lcp[j+1]
00255                 return node.cipos-1;
00256             }
00257         }
00258 
00259         // Get the next smaller value.
00260         /* \par Time complexity
00261          *      \f$ \Order{1} \f$
00262          */
00263         // Returns n if there is no next smaller value in [i+1..n-1]
00264         inline size_type nsv(size_type i, size_type ipos)const { // possible optimization: calculate also position of nsv, i.e. next ( following position cipos
00265             size_type cipos = m_bp_support.find_close(ipos);
00266             size_type result = m_bp_support.rank(cipos);
00267             return result;
00268         }
00269 
00270         // Get the previous smaller value.
00271         /*
00272          * \par Time complexity
00273          *    \f$ \Order{\frac{\sigma}{w}} \f$, where w=64 is the word size, can be implemented in \f$\Order{1}\f$ with rank and select
00274          */
00275         inline size_type psv(size_type i, size_type ipos, size_type cipos, size_type& psvpos, size_type& psvcpos)const {
00276             if (cipos >= m_bp.size() - m_sigma) { // if lcp[i]==0 => psv is the 0th index by definition
00277                 psvpos = 0;
00278                 psvcpos = m_bp.size()-1;
00279                 return 0;
00280             }
00281             size_type r0 = cipos - m_bp_support.rank(cipos); // index of clothing parenthesis in first_child bp
00282             size_type next_first_child = 0;
00283             const uint64_t* p = m_first_child.data() + (r0>>6);
00284             uint64_t w = (*p) >> (r0&0x3F);
00285             if (w) { // if w!=0
00286                 next_first_child = cipos + bit_magic::r1BP(w);
00287                 if (cipos == next_first_child and m_bp[next_first_child+1]) {
00288                     psvpos = m_bp_support.enclose(ipos);
00289                     psvcpos = m_bp_support.find_close(psvpos);
00290                     return m_bp_support.rank(psvpos)-1;
00291                 }
00292             } else {
00293                 cipos += 64-(r0&0x3F);
00294                 ++p;
00295                 while (!(w=*p)) { // while w==0
00296                     ++p;
00297                     cipos += 64;
00298                 }
00299                 next_first_child = cipos + bit_magic::r1BP(w);
00300             }
00301             if (!m_bp[next_first_child+1]) { // if next parenthesis is a closing one
00302                 psvcpos = next_first_child+1;
00303                 psvpos = m_bp_support.find_open(psvcpos);
00304                 return m_bp_support.rank(psvpos)-1;
00305             } else {
00306                 psvpos = m_bp_support.enclose(m_bp_support.find_open(next_first_child));
00307                 psvcpos = m_bp_support.find_close(psvpos);
00308                 return m_bp_support.rank(psvpos)-1;
00309             }
00310         }
00311 
00312         // Range minimum query based on the rr_enclose method.
00313         /* \par Time complexity
00314          *     \f$ \Order{\rrenclose} \f$
00315          */
00316         inline size_type rmq(size_type l, size_type r)const {
00317             size_type i         = m_bp_support.select(l+1);
00318             size_type j         = m_bp_support.select(r+1);
00319             size_type fc_i      = m_bp_support.find_close(i);
00320             if (j < fc_i) { // i < j < find_close(j) < find_close(i)
00321                 return l;
00322             } else { // i < find_close(i) < j < find_close(j)
00323                 size_type ec = m_bp_support.rr_enclose(i,j);
00324                 if (ec == m_bp_support.size()) {// no restricted enclosing pair found
00325                     return r;
00326                 } else { // found range restriced enclosing pair
00327                     return m_bp_support.rank(ec)-1; // subtract 1, as the index is 0 based
00328                 }
00329             }
00330         }
00331 
00332     public:
00333         const Csa& csa;                         
00334         const lcp_type& lcp;                            
00335         const bit_vector& bp;                   
00336         const bp_support_type& bp_support;      
00337 
00338         const bit_vector& first_child_bv;
00339         const fc_rank_support_type& first_child_rank;
00340 
00342         /* @{ */
00343 
00345         cst_sct3p(): csa(m_csa), lcp(m_lcp), bp(m_bp), bp_support(m_bp_support), first_child_bv(m_first_child), first_child_rank(m_first_child_rank) {}
00346 
00347         // Constructor for the cst_sct3p taking a string for that the compressed suffix tree should be calculated.
00348         /*
00349          * \param str Text for which the \f$ \CST \f$ should be constructed. The text should be terminated by a zero byte.
00350          * \pre The text has to be terminated by a zero byte.
00351          */
00352 //              cst_sct3p(const unsigned char *str);
00353 
00354         template<uint8_t int_width, class size_type_class, uint8_t int_width_1, class size_type_class_1, uint8_t int_width_2, class size_type_class_2>
00355         cst_sct3p(const std::string& cst_file_name,
00356                   int_vector_file_buffer<int_width, size_type_class>& lcp_buf,
00357                   int_vector_file_buffer<int_width_1, size_type_class_1>& sa_buf,
00358                   int_vector_file_buffer<int_width_2, size_type_class_2>& isa_buf,
00359                   std::string dir,
00360                   bool build_ony_bps);
00361 
00362         cst_sct3p(tMSS& file_map, const std::string& dir, const std::string& id, bool build_only_bps);
00363 
00365 
00370         cst_sct3p(const cst_sct3p& cst):csa(m_csa),lcp(m_lcp),bp(m_bp),bp_support(m_bp_support),first_child_bv(m_first_child), first_child_rank(m_first_child_rank) {
00371             copy(cst);
00372         }
00373 
00374         /* @} */
00375 
00377         ~cst_sct3p() {}
00378 
00379         void construct(tMSS& file_map, const std::string& dir, const std::string& id, bool build_only_bps);
00380 
00382 
00385         size_type size()const {
00386             return m_bp.size()>>1;
00387         }
00388 
00390 
00393         static size_type max_size() {
00394             return Csa::max_size();
00395         }
00396 
00398 
00401         bool empty()const {
00402             return m_csa.empty();
00403         }
00404 
00406 
00414         void swap(cst_sct3p& cst);
00415 
00417 
00420         const_iterator begin()const {
00421             if (0 == m_bp.size())  // special case: tree is uninitialized
00422                 return end();
00423             else if (2 == m_bp.size()) { // special case: the root is a leaf
00424                 return const_iterator(this, root(), true, true);
00425             }
00426             return const_iterator(this, root(), false, true);
00427         };
00428 
00430 
00433         const_iterator end()const {
00434             return const_iterator(this, root(), true, false);
00435         }
00436 
00438         const_bottom_up_iterator begin_bottom_up()const {
00439             if (0 == m_bp.size())  // special case: tree is uninitialized
00440                 return end_bottom_up();
00441             return const_bottom_up_iterator(this, leftmost_leaf_in_the_subtree(root()));
00442         }
00443 
00445         const_bottom_up_iterator end_bottom_up()const {
00446             return const_bottom_up_iterator(this, root(), false);
00447         }
00448 
00450 
00453         cst_sct3p& operator=(const cst_sct3p& cst);
00454 
00456 
00461         bool operator==(const cst_sct3p& cst)const;
00462 
00464 
00469         bool operator!=(const cst_sct3p& cst)const;
00470 
00472 
00475         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const;
00476 
00478 
00480         void load(std::istream& in);
00481 
00483         /* @{ */
00484 
00486 
00490         node_type root() const {
00491             return node_type(0, m_bp.size()-1, m_bp.size());
00492         }
00493 
00495 
00501         bool is_leaf(const node_type& v)const {
00502             return v.i==v.j;
00503         }
00504 
00506 
00513         node_type ith_leaf(size_type i)const {
00514             assert(i > 0 and i <= size());
00515             size_type ipos = m_bp_support.select(i);
00516             size_type jp1pos;
00517             if (i == size())
00518                 jp1pos = m_bp.size();
00519             else if (m_bp[ipos+1])
00520                 jp1pos = ipos+1;
00521             else
00522                 jp1pos = m_bp_support.select(i+1);
00523             return node_type(ipos, m_bp_support.find_close(ipos), jp1pos);
00524         }
00525 
00527 
00534         size_type leaves_in_the_subtree(const node_type& v)const {
00535             return v.j-v.i+1;
00536         }
00537 
00539 
00544         node_type leftmost_leaf_in_the_subtree(const node_type& v)const {
00545             return ith_leaf(v.i+1);
00546         }
00547 
00549 
00554         node_type rightmost_leaf_in_the_subtree(const node_type& v)const {
00555             return ith_leaf(v.j+1);
00556         }
00557 
00559 
00566         size_type lb(const node_type& v)const {
00567             return v.i;
00568         }
00569 
00571 
00578         size_type rb(const node_type& v)const {
00579             return v.j;
00580         }
00581         /*
00582                         bool lcp_value_equals_zero(size_type i)const{
00583                                 size_type ipos = m_bp_support.find_close(m_bp_support.select(i+1));
00584                                 return ipos >= (m_bp.size() - m_sigma);
00585                         }
00586         */
00587 
00589 
00594         node_type parent(const node_type& v) const {
00595 //std::cout<<"parent "<<depth(v)<<v<<std::endl;
00596 #ifndef NDEBUG
00597 //                      std::cout<<"parent interval of "<<v.l<<"-["<<v.left<<" "<<v.right<<"]"<<std::endl;
00598 #endif
00599             if (v.cipos > v.jp1pos) { // LCP[i] <= LCP[j+1]
00600                 size_type psv_pos, psv_cpos, psv_v, nsv_v, nsv_p1pos;
00601                 psv_v = psv(v.j+1, v.jp1pos, m_bp_support.find_close(v.jp1pos), psv_pos, psv_cpos);
00602                 nsv_v = nsv(v.j+1, v.jp1pos)-1;
00603                 if (nsv_v == size()-1) {
00604                     nsv_p1pos = m_bp.size();
00605                 } else { // nsv_v < size()-1
00606                     nsv_p1pos = m_bp_support.select(nsv_v+2);
00607                 }
00608                 return node_type(psv_v, nsv_v, psv_pos, psv_cpos, nsv_p1pos);
00609             } else { // LCP[i] > LCP[j+1]
00610                 size_type psv_pos, psv_cpos, psv_v;
00611                 psv_v = psv(v.i, v.ipos, v.cipos, psv_pos, psv_cpos);
00612                 return node_type(psv_v, v.j, psv_pos, psv_cpos, v.jp1pos);
00613             }
00614         }
00615 
00617 
00623         node_type sibling(const node_type& v)const {
00624 //std::cout<<"sibling "<<depth(v)<<v<<std::endl;
00625 // Vorgehen:(1) Bestimmen, ob v noch einen rechten Bruder hat. Entsp. parent hat gleicht rechte Grenze wie v. Speziallfall rechter Rand ist hier schon behandelt!
00626             if (v.cipos < v.jp1pos) { // LCP[i] > LCP[j+1] => v hat die selbe rechte Grenze wie parent(v) => kein rechter Bruder
00627                 return root();
00628             }
00629 //          (2) Es existiert ein rechter Bruder, LCP[j+1] >= LCP[i] und j>i
00630             // es gilt nun: v.cipos > v.jp1pos
00631             size_type cjp1posm1 = m_bp_support.find_close(v.jp1pos)-1; // v.cipos-2 ???
00632 //std::cerr<<"cjp1posm1="<<cjp1posm1<<std::endl;
00633 //                      size_type cjp1posm1 = v.cipos-1;
00634 //                      if( cjp1posm1+1 != v.cipos ){
00635 //                              std::cout<<v<<" "<<cjp1posm1+1<<" "<<v.cipos<<std::endl;
00636 //                      }
00637 //                      size_type cjp1posm1 = v.cipos-2;
00638             // an der stelle von cjp1posm1 steht 1, falls v das letzte Kind ist
00639             bool last_child = m_bp[cjp1posm1];
00640             // an der stelle von cjp1posm1 steht 0 -> entweder nicht letztes Kind oder letztes Kind
00641             if (!last_child) {
00642                 size_type first_child_idx = cjp1posm1 - m_bp_support.rank(cjp1posm1);
00643                 last_child = m_first_child[first_child_idx]; // if first_child indicator is true => the new sibling is the rightmost sibling
00644             }
00645             if (last_child) {
00646                 size_type nsv_v = nsv(v.j+1, v.jp1pos)-1, nsv_p1pos;
00647                 if (nsv_v == size()-1) {
00648                     nsv_p1pos = m_bp.size();
00649                 } else {
00650                     nsv_p1pos = m_bp_support.select(nsv_v+2);
00651                 }
00652                 return node_type(v.j+1, nsv_v, v.jp1pos, m_bp_support.find_close(v.jp1pos), nsv_p1pos);
00653             } else {
00654 //std::cerr<<"not last child: "<<std::endl;
00655                 /*
00656                                         if( cjp1posm1+1 != v.cipos-1 ){
00657                                                 std::cout<<v<<" "<<cjp1posm1+1<<" "<<v.cipos<<std::endl;
00658                                                 std::cout<<"LCP["<<v.i<<"]="<<m_lcp[v.i]<<" "<<"LCP["<<v.j<<"]="<<m_lcp[v.j];
00659                                                 if(v.j<size())
00660                                                         std::cout<<" LCP[j+1]="<<m_lcp[v.j+1]<<std::endl;
00661                                         }
00662                 */
00663 //std::cerr<<"find_open(cjp1posm1) = "<< m_bp_support.find_open(cjp1posm1) << std::endl;
00664                 size_type new_j = m_bp_support.rank(m_bp_support.find_open(cjp1posm1))-2;
00665                 return node_type(v.j+1, new_j, v.jp1pos, m_bp_support.find_close(v.jp1pos), m_bp_support.select(new_j+2));
00666             }
00667         }
00668 
00670 
00679         node_type ith_child(const node_type& v, size_type i)const {
00680 //                      std::cerr<<i<<"th child "<<depth(v)<<v<<" i="<<i<<std::endl;
00681             assert(i > 0);
00682             if (is_leaf(v))  // if v is a leave, v has no child
00683                 return root();
00684             if (1 == i) {
00685                 size_type k = 0, kpos = 0, k_find_close = 0;
00686                 // v is not a leave: v has at least two children
00687                 k = get_first_l_index(v, kpos, k_find_close);// get first l-index k and the position of k
00688                 return node_type(v.i, k-1, v.ipos, v.cipos, kpos);
00689             } else { // i > 1
00690                 size_type k1, kpos1, k_find_close1;
00691                 k1 = get_ith_l_index(v, i-1, kpos1, k_find_close1);
00692                 if (k1 == v.j+1)
00693                     return root();
00694                 size_type k2, kpos2, k_find_close2;
00695                 k2 = get_ith_l_index(v, i, kpos2, k_find_close2);
00696                 return node_type(k1, k2-1, kpos1, k_find_close1, kpos2);
00697             }
00698         }
00699 
00701 
00707         size_type degree(const node_type& v)const {
00708             if (is_leaf(v))  // if v is a leave, v has no child
00709                 return 0;
00710             // v is not a leave: v has at least two children
00711             size_type r = get_pos_of_closing_parenthesis_of_the_first_l_index(v);
00712             /*                  if( m_bp[r-1] ){// if there exists no next l-index
00713                                         return 2;
00714                                 }
00715             */
00716             size_type r0 = r - m_bp_support.rank(r);
00717             const uint64_t* p = m_first_child.data() + (r0>>6);
00718             uint8_t offset = r0&0x3F;
00719 
00720             uint64_t w = (*p) & bit_magic::Li1Mask[offset];
00721             if (w) {
00722                 return offset-bit_magic::l1BP(w)+1;
00723             } else if (m_first_child.data() == p) { // w==0 and we are in the first word
00724                 return offset+2; // da bit_magic::l1BP(w)=-1 sein muesste
00725             } else {
00726                 size_type res = offset+2;
00727                 while (p > m_first_child.data()) {
00728                     w = *(--p);
00729                     if (0==w)
00730                         res += 64;
00731                     else {
00732                         return res + (63-bit_magic::l1BP(w));
00733                     }
00734                 }
00735                 return res + (w==0);
00736             }
00737         }
00738 
00739 
00740 
00741         // Returns the next sibling of node v.
00742         // Only for tests.
00743         node_type sibling_naive(const node_type& v)const {
00744             if (v==root())
00745                 return root();
00746             node_type parent = this->parent(v);
00747             assert(parent != v);
00748             size_type nr = degree(parent);
00749             for (size_type i=1; i <= nr; ++i)
00750                 if (ith_child(parent, i) == v and i!=nr)
00751                     return ith_child(parent, i+1);
00752             return root();
00753         }
00754 
00755 
00757 
00765         // TODO const unsigned char c durch char_type ersetzen
00766         node_type child(const node_type& v, const unsigned char c, size_type& char_pos)const {
00767             if (is_leaf(v))  // if v is a leaf = (), v has no child
00768                 return root();
00769             // else v = ( (     ))
00770             uint16_t cc = m_csa.char2comp[c];
00771             if (cc==0 and c!=0) // TODO: aendere char2comp so ab, dass man diesen sonderfall nicht braucht
00772                 return root();
00773             size_type char_ex_max_pos = m_csa.C[cc+1], char_inc_min_pos = m_csa.C[cc];
00774 
00775             size_type d                 = depth(v);
00776 
00777 //                      (1) check the first child
00778 //                      char_pos = m_csa(m_csa[v.i]+d);  // replaced by the next line
00779             char_pos = get_char_pos(v.i, d, m_csa);
00780             if (char_pos >= char_ex_max_pos) {// the first character of the first child interval is lex. greater than c
00781                 // => all other first characters of the child intervals are also greater than c => no solution
00782                 return root();
00783             } else if (char_pos >= char_inc_min_pos) { // i.e. char_pos < char_ex_max_pos and char_pos >= char_inc_min_pos
00784                 return ith_child(v, 1);
00785             }
00786 
00787             size_type child_cnt         = degree(v);
00788 
00789 //                      (2) check the last child
00790 //                      char_pos = m_csa(m_csa[v.j]+d); // replaced by the next line
00791             char_pos = get_char_pos(v.j, d, m_csa);
00792             if (char_pos < char_inc_min_pos) {// the first character of the last child interval is lex. smaller than c
00793                 // =>   all other first characters of the child intervals are also smaller than c => no solution
00794                 return root();
00795             } else if (char_pos < char_ex_max_pos) { // i.e. char_pos < char_ex_max_pos and char_pos >= char_inc_min_pos
00796                 return ith_child(v, child_cnt);
00797             }
00798 
00799 //                      (3) binary search for c in the children [2..children)
00800             size_type l_bound = 2, r_bound = child_cnt, mid, kpos, ckpos, l_index;
00801             while (l_bound < r_bound) {
00802                 mid = (l_bound + r_bound) >> 1;
00803 
00804                 l_index = get_ith_l_index(v, mid-1, kpos, ckpos);
00805 //                              char_pos = m_csa(m_csa[l_index]+d); // replaced by the next line
00806                 char_pos = get_char_pos(l_index, d, m_csa);
00807 
00808                 if (char_inc_min_pos > char_pos) {
00809                     l_bound = mid+1;
00810                 } else if (char_ex_max_pos <= char_pos) {
00811                     r_bound = mid;
00812                 } else { // char_inc_min_pos <= char_pos < char_ex_max_pos => found child
00813                     // we know that the child is not the last child, see (2)
00814                     // find next l_index: we know that a new l_index exists: i.e. assert( 0 == m_bp[ckpos-1]);
00815                     size_type lp1_index = m_bp_support.rank(m_bp_support.find_open(ckpos-1))-1;
00816                     size_type jp1pos = m_bp.size();
00817                     if (lp1_index-1 < size()-1) {
00818                         jp1pos = m_bp_support.select(lp1_index+1);
00819                     }
00820                     return node_type(l_index, lp1_index-1, kpos, ckpos, jp1pos);
00821                 }
00822             }
00823             return root();
00824         }
00825 
00827         // \sa child(node_type v, const unsigned char c, size_type &char_pos)
00828         node_type child(const node_type& v, const unsigned char c) {
00829             size_type char_pos;
00830             return child(v, c, char_pos);
00831         }
00832 
00834 
00841         unsigned char edge(const node_type& v, size_type d)const {
00842 #ifndef NDEBUG
00843             if (d < 1 or d > depth(v)) {
00844                 throw std::out_of_range("OUT_OF_RANGE_ERROR: "+util::demangle(typeid(this).name())+"cst_sct3p<>::edge(node_type v, size_type d). d == 0 or d > depth(v)!");
00845             }
00846 #endif
00847 //                      size_type order = m_csa(m_csa[v.i]+d-1); replaced by the next line
00848             size_type order = get_char_pos(v.i, d-1, m_csa);
00849             uint16_t c_begin = 1, c_end = m_sigma+1, mid;
00850             while (c_begin < c_end) {
00851                 mid = (c_begin+c_end)>>1;
00852                 if (m_csa.C[mid] <= order) {
00853                     c_begin = mid+1;
00854                 } else {
00855                     c_end = mid;
00856                 }
00857             }
00858             return m_csa.comp2char[c_begin-1];
00859         }
00860 
00862 
00870         node_type lca(node_type v, node_type w)const {
00871             if (v.i > w.i or (v.i == w.i and v.j < w.j)) {
00872                 std::swap(v, w);
00873             }
00874             assert(v.i < w.i or (v.i==w.i and v.j >= j));
00875             assert(!(v.i < w.i and v.j > w.i and v.j < w.j));   // assert that v and w do not overlapp
00876             if (v.j >= w.j) { // v encloses w or v==w
00877                 return v;
00878             } else { // v.i < v.j < w.i < w.j
00879                 size_type min_index = rmq(v.i+1, w.j);
00880                 size_type min_index_pos         = m_bp_support.select(min_index+1);
00881                 size_type min_index_cpos        = m_bp_support.find_close(min_index_pos);
00882 
00883                 if (min_index_cpos >= m_bp.size() - m_sigma) { // if lcp[min_index]==0 => return root
00884                     return root();
00885                 }
00886                 size_type new_j = nsv(min_index, min_index_pos)-1;
00887                 size_type new_ipos, new_icpos;
00888                 size_type new_i = psv(min_index, min_index_pos, min_index_cpos, new_ipos, new_icpos);
00889                 size_type jp1pos = m_bp.size();
00890                 if (new_j < size()-1) {
00891                     jp1pos = m_bp_support.select(new_j+2);
00892                 }
00893                 return node_type(new_i, new_j, new_ipos, new_icpos, jp1pos);
00894             }
00895         }
00896 
00898 
00904         size_type depth(const node_type& v)const {
00905             if (v.i == v.j) {
00906                 return size()-m_csa[v.i];
00907             } else if (v == root()) {
00908                 return 0;
00909             } else {
00910                 size_type kpos, ckpos;
00911                 size_type l = get_first_l_index(v, kpos, ckpos);
00912                 return m_lcp[l];
00913             }
00914         }
00915 
00917 
00923         // TODO: can be implemented in O(1) with o(n) space. See Jansson, Sadakane, Sung, SODA 2007, "Ultra-succinct Representation of Ordered Trees"
00924         size_type node_depth(node_type v)const {
00925             size_type d = 0;
00926             while (v != root()) {
00927                 ++d;
00928                 v = parent(v);
00929             }
00930             return d;
00931         }
00932 
00934 
00940         node_type sl(const node_type& v)const {
00941             if (v == root())
00942                 return root();
00943             // get interval with first char deleted
00944             size_type i  = m_csa.psi[v.i];
00945             if (is_leaf(v)) {
00946                 if (v.i==0 and v.j==0) // if( v.l==1 )
00947                     return root();
00948                 else
00949                     return ith_leaf(i+1);
00950             }
00951             size_type j  = m_csa.psi[v.j];
00952             assert(i < j);
00953             size_type min_index = rmq(i+1, j); // rmq
00954             size_type min_index_pos     = m_bp_support.select(min_index+1);
00955             size_type min_index_cpos    = m_bp_support.find_close(min_index_pos);
00956             if (min_index_cpos >= m_bp.size() - m_sigma) { // if lcp[min_index]==0 => return root
00957                 return root();
00958             }
00959             size_type new_j = nsv(min_index, min_index_pos)-1;
00960             size_type new_ipos, new_icpos;
00961             size_type new_i = psv(min_index, min_index_pos, min_index_cpos, new_ipos, new_icpos);
00962             size_type jp1pos = m_bp.size();
00963             if (new_j < size()-1) {
00964                 jp1pos = m_bp_support.select(new_j+2);
00965             }
00966             return node_type(new_i, new_j, new_ipos, new_icpos, jp1pos);
00967         }
00968 
00969 
00971         /*
00972          *  \param v A valid not of a cst_sct3p.
00973          *  \param c The character which should be prepended to the string of the current node.
00974          *      \return root() if the Weiner link of (v, c) does not exist, otherwise the Weiner link is returned.
00975          *  \par Time complexity
00976          *              \f$ \Order{ t_{rank\_bwt} } \f$
00977          *
00978          */
00979         node_type wl(const node_type& v, const unsigned char c) const {
00980             size_type c_left    = m_csa.rank_bwt(v.i, c);
00981             size_type c_right   = m_csa.rank_bwt(v.j+1, c);
00982             if (c_left == c_right)  // there exists no Weiner link
00983                 return root();
00984             if (c_left+1 == c_right)
00985                 return ith_leaf(m_csa.C[m_csa.char2comp[c]] + c_left + 1);
00986             else {
00987                 size_type left  = m_csa.char2comp[c] + c_left;
00988                 size_type right = m_csa.char2comp[c] + c_right - 1;
00989                 assert(left < right);
00990 
00991                 size_type ipos = m_bp_support.select(left+1);
00992                 size_type jp1pos = m_bp.size();
00993                 if (right < size()-1) {
00994                     jp1pos = m_bp_support.select(right+2);
00995                 }
00996                 return node_type(left, right, ipos, m_bp_support.find_close(ipos), jp1pos);
00997             }
00998         }
00999 
01001 
01006         size_type sn(const node_type& v)const {
01007             assert(is_leaf(v));
01008             return m_csa[v.i];
01009         }
01010 
01012 
01018         size_type id(const node_type& v)const {
01019             if (is_leaf(v)) { // return i in the range from 0..csa.size()-1
01020                 return v.i;
01021             }
01022             size_type clpos; // closing parentheses of the l-index
01023             if (v.cipos > v.jp1pos) { // corresponds to m_lcp[i] <= m_lcp[j+1]
01024                 clpos   = v.jp1pos-1;
01025             } else { // corresponds to m_lcp[i] > m_lcp[j+1]
01026                 clpos   = v.cipos-1;
01027             }
01028             assert(m_bp[clpos]==0);
01029             size_type r0clpos = clpos-m_bp_support.rank(clpos);
01030 
01031             return size()+m_first_child_rank(r0clpos);
01032         }
01033 
01035         size_type nodes()const {
01036             return m_nodes;
01037         }
01038 
01040         /* \param lb Left bound of the lcp-interval [lb..rb] (inclusive).
01041          * \param rb Right bound of the lcp-interval [lb..rb] (inclusive).
01042          * \param l  Depth of the lcp-interval.
01043          *\ return The node in the suffix tree corresponding lcp-interval [lb..rb]
01044          */
01045         node_type node(size_type lb, size_type rb, size_type l=0) const {
01046             size_type ipos = m_bp_support.select(lb+1);
01047             size_type jp1pos;
01048             if (rb == size()-1) {
01049                 jp1pos = m_bp.size();
01050             } else {
01051                 jp1pos = m_bp_support.select(rb+2);
01052             }
01053             return node_type(ipos, m_bp_support.find_close(ipos), jp1pos);
01054         }
01055 
01057 
01061         size_type tlcp_idx(size_type i) const {
01062             size_type ipos      = m_bp_support.select(i+1);
01063             size_type cipos = m_bp_support.find_close(ipos);
01064             return m_first_child_rank.rank(((ipos+cipos-1)>>1)-i);
01065         }
01066         /* @} */
01067 
01068 };
01069 
01070 // == template functions ==
01071 
01072 
01073 template<class Csa, class Lcp, class Bp_support, class Rank_support>
01074 template<uint8_t int_width, class size_type_class, uint8_t int_width_1, class size_type_class_1, uint8_t int_width_2, class size_type_class_2>
01075 cst_sct3p<Csa, Lcp, Bp_support, Rank_support>::cst_sct3p(const std::string& csa_file_name,
01076         int_vector_file_buffer<int_width, size_type_class>& lcp_buf,
01077         int_vector_file_buffer<int_width_1, size_type_class_1>& sa_buf,
01078         int_vector_file_buffer<int_width_2, size_type_class_2>& isa_buf,
01079         std::string dir="./",
01080         bool build_only_bps=false
01081                                                         ):csa(m_csa), lcp(m_lcp), bp(m_bp), bp_support(m_bp_support), first_child_bv(m_first_child), first_child_rank(m_first_child_rank)
01082 {
01083     std::string id =  util::to_string(util::get_pid())+"_"+util::to_string(util::get_id()).c_str();
01084 
01085     write_R_output("cst", "construct BPS", "begin", 1, 0);
01086     m_nodes = algorithm::construct_supercartesian_tree_bp_succinct_and_first_child(lcp_buf, m_bp, m_first_child) + m_bp.size()/2;
01087     write_R_output("cst", "construct BPS", "end", 1, 0);
01088 
01089     if (!build_only_bps) {
01090         util::load_from_file(m_csa, csa_file_name.c_str());
01091 
01092         write_R_output("cst", "construct CLCP", "begin", 1, 0);
01093         construct_lcp(m_lcp, *this, lcp_buf, isa_buf);
01094         write_R_output("cst", "construct CLCP", "end", 1, 0);
01095     }
01096     write_R_output("cst", "construct BPSS", "begin", 1, 0);
01097     m_bp_support = Bp_support(&m_bp);
01098     util::init_support(m_first_child_rank, &m_first_child);
01099     write_R_output("cst", "construct BPSS", "end", 1, 0);
01100     m_sigma = degree(root());
01101 }
01102 
01103 
01104 template<class Csa, class Lcp, class Bp_support, class Rank_support>
01105 cst_sct3p<Csa, Lcp, Bp_support, Rank_support>::cst_sct3p(tMSS& file_map, const std::string& dir, const std::string& id, bool build_only_bps = false):csa(m_csa), lcp(m_lcp), bp(m_bp), bp_support(m_bp_support), first_child_bv(m_first_child), first_child_rank(m_first_child_rank)
01106 {
01107     construct(file_map, dir, id, build_only_bps);
01108 }
01109 
01110 
01111 template<class Csa, class Lcp, class Bp_support, class Rank_support>
01112 void cst_sct3p<Csa, Lcp, Bp_support, Rank_support>::construct(tMSS& file_map, const std::string& dir, const std::string& id, bool build_only_bps = false)
01113 {
01114     write_R_output("cst", "construct BPS", "begin", 1, 0);
01115     int_vector_file_buffer<> lcp_buf(file_map["lcp"].c_str());
01116     m_nodes = algorithm::construct_supercartesian_tree_bp_succinct_and_first_child(lcp_buf, m_bp, m_first_child) + m_bp.size()/2;
01117     write_R_output("cst", "construct BPS", "end", 1, 0);
01118     write_R_output("cst", "construct BPSS", "begin", 1, 0);
01119     m_bp_support = Bp_support(&m_bp);
01120     util::init_support(m_first_child_rank, &m_first_child);
01121     write_R_output("cst", "construct BPSS", "end", 1, 0);
01122 
01123     if (!build_only_bps) {
01124         write_R_output("cst", "construct CLCP", "begin", 1, 0);
01125         construct_lcp(m_lcp, *this, file_map, dir, id);
01126         write_R_output("cst", "construct CLCP", "end", 1, 0);
01127     }
01128     if (!build_only_bps) {
01129         util::load_from_file(m_csa, file_map["csa"].c_str());
01130     }
01131     m_sigma = degree(root());
01132 }
01133 
01134 template<class Csa, class Lcp, class Bp_support, class Rank_support>
01135 typename cst_sct3p<Csa, Lcp, Bp_support, Rank_support>::size_type cst_sct3p<Csa, Lcp, Bp_support, Rank_support>::serialize(std::ostream& out, structure_tree_node* v, std::string name)const
01136 {
01137     structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
01138     size_type written_bytes = 0;
01139         written_bytes += m_csa.serialize(out, child, "csa");
01140     written_bytes += m_lcp.serialize(out, child, "lcp");
01141     written_bytes += m_bp.serialize(out, child, "bp");
01142     written_bytes += m_bp_support.serialize(out, child, "bp_support");
01143     written_bytes += m_first_child.serialize(out, child, "mark_child");
01144     written_bytes += m_first_child_rank.serialize(out, child, "mark_child_rank");
01145     written_bytes += util::write_member(m_sigma, out, child, "sigma");
01146     written_bytes += util::write_member(m_nodes, out, child, "node_cnt");
01147     structure_tree::add_size(child, written_bytes);
01148     return written_bytes;
01149 }
01150 
01151 template<class Csa, class Lcp, class Bp_support, class Rank_support>
01152 void cst_sct3p<Csa, Lcp, Bp_support, Rank_support>::load(std::istream& in)
01153 {
01154     m_csa.load(in);
01155     load_lcp(m_lcp, in, *this);
01156     m_bp.load(in);
01157     m_bp_support.load(in, &m_bp);
01158     m_first_child.load(in);
01159     m_first_child_rank.load(in, &m_first_child);
01160     util::read_member(m_sigma, in);
01161     util::read_member(m_nodes, in);
01162 #ifdef SDSL_DEBUG
01163     assert(algorithm::check_bp_support(m_bp, m_bp_support));
01164     std::cerr<<"checked bp_support"<<std::endl;
01165 #endif
01166 }
01167 
01168 template<class Csa, class Lcp, class Bp_support, class Rank_support>
01169 cst_sct3p<Csa, Lcp, Bp_support, Rank_support>& cst_sct3p<Csa, Lcp, Bp_support, Rank_support>::operator=(const cst_sct3p& cst)
01170 {
01171     if (this != &cst) {
01172         copy(cst);
01173     }
01174     return *this;
01175 }
01176 
01177 
01178 
01179 
01180 
01181 } // end namespace sdsl
01182 
01183 
01184 #endif