SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/test_index_performance.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2010 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef INCLUDE_SDSL_TEST_INDEX_PERFORMANCE
00022 #define INCLUDE_SDSL_TEST_INDEX_PERFORMANCE
00023 
00024 #include "int_vector.hpp"       // for bit_vector and int_vector
00025 #include "testutils.hpp"        // for write_R_output 
00026 #include "util.hpp"                     // for 
00027 #include "algorithms.hpp"       // for backward_search
00028 #include "testutils.hpp"    // for file
00029 #include <cstdlib>                      // for rand 
00030 #include <algorithm>            // for swap
00031 #include <vector>                       // for std::vector      
00032 #include <iostream>
00033 
00034 namespace sdsl
00035 {
00036 
00038 /*
00039  */
00040 int_vector<64> get_rnd_positions(uint8_t log_s, uint64_t& mask, uint64_t m=0, uint64_t x=17);
00041 
00042 template<class Vector>
00043 void test_int_vector_random_access(const Vector& v, bit_vector::size_type times=100000000)
00044 {
00045     typedef bit_vector::size_type size_type;
00046     uint64_t mask;
00047     int_vector<64> rands = get_rnd_positions(20, mask, v.size());
00048     size_type cnt=0;
00049     write_R_output("int_vector","random access","begin",times,cnt);
00050     for (size_type i=0; i<times; ++i) {
00051         cnt += v[rands[ i&mask ]];
00052     }
00053     write_R_output("int_vector","random access","end",times,cnt);
00054 }
00055 
00056 template<class Vector>
00057 void test_int_vector_random_write(Vector& v, bit_vector::size_type times=100000000)
00058 {
00059     typedef bit_vector::size_type size_type;
00060     uint64_t mask;
00061     int_vector<64> rands = get_rnd_positions(20, mask, v.size());
00062     size_type cnt=0;
00063     write_R_output("int_vector","random write","begin",times,cnt);
00064     for (size_type i=0; i<times; ++i) {
00065         cnt += (v[rands[ i&mask ]] = i);
00066     }
00067     write_R_output("int_vector","random write","end",times,cnt);
00068 }
00069 
00070 template<class Vector>
00071 void test_int_vector_sequential_write(Vector& v, bit_vector::size_type times=100000000)
00072 {
00073     typedef bit_vector::size_type size_type;
00074     const uint64_t mask = (1ULL << bit_magic::l1BP(v.size()))-1;
00075 //      std::cout<<" mask="<<mask<<std::endl;
00076     size_type cnt=0;
00077     write_R_output("int_vector","seq write","begin",times,cnt);
00078     for (size_type i=0; i<times; ++i) {
00079         cnt += (v[i&mask] = i);
00080     }
00081     write_R_output("int_vector","seq write","end",times,cnt);
00082 }
00083 
00085 /*
00086  */
00087 template<class Rank>
00088 void test_rank_random_access(const Rank& rank, bit_vector::size_type times=20000000)
00089 {
00090     typedef bit_vector::size_type size_type;
00091     uint64_t mask;
00092     int_vector<64> rands = get_rnd_positions(20, mask, rank.size()+1);
00093     size_type cnt=0;
00094     write_R_output("rank","random access","begin",times,cnt);
00095     for (size_type i=0; i<times; ++i) {
00096         cnt += rank.rank(rands[ i&mask ]);
00097     }
00098     write_R_output("rank","random access","end",times,cnt);
00099 }
00100 
00102 /*
00103  * param size The size of the bit vector in bits for which to rank_support should be created
00104  */
00105 template<class Rank>
00106 void test_rank_construction(bit_vector::size_type size=838860800)
00107 {
00108     typedef bit_vector::size_type size_type;
00109     bit_vector b(size);
00110     util::set_random_bits(b, 17);
00111     std::cout<<"# bit vector size of rank construct : "<<size<<std::endl;
00112     write_R_output("rank","construct","begin",1,0);
00113     Rank rs(&b); // construct rank_support data structure
00114     write_R_output("rank","construct","end",1,rs(size));
00115 }
00116 
00117 
00118 
00120 /*
00121  */
00122 template<class Select>
00123 void test_select_random_access(const Select& select, bit_vector::size_type times=20000000)
00124 {
00125     typedef bit_vector::size_type size_type;
00126     const int s = 20;
00127     const uint64_t mask = (1<<s)-1;
00128     int_vector<64> rands(1<<s ,0);
00129     util::set_random_bits(rands, 17);
00130     size_type args = util::get_one_bits(*(select.v));
00131     util::all_elements_mod(rands, args);
00132     for (size_type i=0; i<rands.size(); ++i)
00133         rands[i] = rands[i]+1;
00134     size_type cnt=0;
00135     write_R_output("select","random access","begin",times,cnt);
00136     for (size_type i=0; i<times; ++i) {
00137         cnt += select.select(rands[ i&mask ]);
00138     }
00139     write_R_output("select","random access","end",times,cnt);
00140 }
00141 
00143 /*
00144  */
00145 template<class Select>
00146 void test_select_random_access(const Select& select, bit_vector::size_type args, bit_vector::size_type times)
00147 {
00148     typedef bit_vector::size_type size_type;
00149     const int s = 20;
00150     const uint64_t mask = (1<<s)-1;
00151     int_vector<64> rands(1<<s ,0);
00152     util::set_random_bits(rands, 17);
00153     util::all_elements_mod(rands, args);
00154     for (size_type i=0; i<rands.size(); ++i)
00155         rands[i] = rands[i]+1;
00156     size_type cnt=0;
00157     write_R_output("select","random access","begin",times,cnt);
00158     for (size_type i=0; i<times; ++i) {
00159         cnt += select.select(rands[ i&mask ]);
00160     }
00161     write_R_output("select","random access","end",times,cnt);
00162 }
00163 
00165 /*
00166  * param size The size of the bit vector in bits for which to rank_support should be created
00167  */
00168 template<class Select>
00169 void test_select_construction(bit_vector::size_type size=838860800)
00170 {
00171     typedef bit_vector::size_type size_type;
00172     bit_vector b(size);
00173     util::set_random_bits(b, 17);
00174     std::cout<<"# bit vector size of select construct : "<<size<<std::endl;
00175     write_R_output("select","construct","begin",1,0);
00176     Select sls(&b); // construct rank_support data structure
00177     write_R_output("select","construct","end",1, sls(1));
00178     std::cout<<"# size of the select_support_mcl :  "<< ((double)util::get_size_in_bytes(sls))/util::get_size_in_bytes(b) <<std::endl;
00179 }
00180 
00181 template<class Csa>
00182 void test_csa_access(const Csa& csa, typename Csa::size_type times=1000000)
00183 {
00184     typedef typename Csa::size_type size_type;
00185     size_type cnt=0;
00186     uint64_t mask;
00187     int_vector<64> rands = get_rnd_positions(20, mask, csa.size());
00188     write_R_output("csa","csa[]","begin",times,cnt);
00189     for (size_type i=0; i<times; ++i) {
00190         cnt += csa[ rands[ i&mask ] ];
00191     }
00192     write_R_output("csa","csa[]","end",times,cnt);
00193 }
00194 
00195 template<class Csa>
00196 void test_icsa_access(const Csa& csa, typename Csa::size_type times=1000000)
00197 {
00198     typedef typename Csa::size_type size_type;
00199     size_type cnt=0;
00200     uint64_t mask;
00201     int_vector<64> rands = get_rnd_positions(20, mask, csa.size());
00202     write_R_output("csa","csa()","begin",times,cnt);
00203     for (size_type i=0; i<times; ++i) {
00204         cnt += csa(rands[ i&mask ]);
00205     }
00206     write_R_output("csa","csa()","end",times,cnt);
00207 }
00208 
00209 // Test random access on \f$\Psi\f$ function
00210 template<class Csa>
00211 void test_psi_access(const Csa& csa, typename Csa::size_type times=1000000)
00212 {
00213     typedef typename Csa::size_type size_type;
00214     size_type cnt=0;
00215     uint64_t mask;
00216     int_vector<64> rands = get_rnd_positions(20, mask, csa.size());
00217     write_R_output("csa","psi[]","begin",times,cnt);
00218     for (size_type i=0; i<times; ++i) {
00219         cnt += csa.psi[rands[ i&mask ]];
00220     }
00221     write_R_output("csa","psi[]","end",times,cnt);
00222 }
00223 
00225 template<class Csa>
00226 void test_lf_access(const Csa& csa, typename Csa::size_type times=1000000)
00227 {
00228     typedef typename Csa::size_type size_type;
00229     size_type cnt=0;
00230     uint64_t mask;
00231     int_vector<64> rands = get_rnd_positions(20, mask, csa.size());
00232     write_R_output("csa","psi()","begin",times,cnt);
00233     for (size_type i=0; i<times; ++i) {
00234         cnt += csa.psi(rands[ i&mask ]);
00235     }
00236     write_R_output("csa","psi()","end",times,cnt);
00237 }
00238 
00240 template<class Csa>
00241 void test_bwt_access(const Csa& csa, typename Csa::size_type times=1000000)
00242 {
00243     typedef typename Csa::size_type size_type;
00244     uint64_t mask;
00245     int_vector<64> rands = get_rnd_positions(20, mask, csa.size());
00246     size_type cnt=0;
00247     write_R_output("csa","bwt[]","begin",times,cnt);
00248     for (size_type i=0; i<times; ++i) {
00249         cnt += csa.bwt[ rands[ i&mask ] ];
00250     }
00251     write_R_output("csa","bwt[]","end",times,cnt);
00252 }
00253 
00255 
00260 template<class Csa>
00261 void test_pattern_matching(const Csa& csa,
00262                            const char* file_name,
00263                            const typename Csa::size_type pattern_len=20,
00264                            typename Csa::size_type times=1000000)
00265 {
00266     typedef typename Csa::size_type size_type;
00267     uint64_t mask;
00268     int_vector<64> rands = get_rnd_positions(15, mask, csa.size()-pattern_len);
00269     unsigned char* patterns = new unsigned char[rands.size()*pattern_len + 2];
00270     write_R_output("csa","extract patterns","begin",times,0);
00271 
00272     size_type file_size = 0;
00273     char* ccc = NULL;
00274     if ((file_size=file::read_text(file_name, ccc)) == 0) {
00275         throw std::logic_error("file " + std::string(file_name) + " has size 0 or could not be read");
00276     }
00277 
00278     for (size_type i=0; i<rands.size(); ++i) {
00279         for (size_type j=rands[i]; j < rands[i] + pattern_len; ++j)
00280             patterns[i*pattern_len + (j-rands[i])] = ccc[j];
00281     }
00282     delete [] ccc;
00283     write_R_output("csa","extract patterns","end",times,0);
00284 
00285     size_type cnt=0;
00286     write_R_output("csa","pattern_matching","begin",times,cnt);
00287     for (size_type i=0, l_res=0, r_res=0; i<times; ++i) {
00288         cnt += algorithm::backward_search(csa, 0, csa.size(),
00289                                           (patterns + ((i&mask)*pattern_len)),
00290                                           pattern_len, l_res, r_res);
00291     }
00292     write_R_output("csa","pattern_matching","end",times,cnt);
00293     delete [] patterns;
00294 }
00295 
00296 
00297 template<class Csa>
00298 void test_rank_bwt_access(const Csa& csa, typename Csa::size_type times=1000000)
00299 {
00300     typedef typename Csa::size_type size_type;
00301     size_type cnt=0;
00302     // precalc queries, distribution of chars is important!
00303     uint64_t mask, s=20;
00304     int_vector<64> rands = get_rnd_positions(s, mask, csa.size()+1);
00305     unsigned char* c_rands = new unsigned char[1<<s];
00306     for (size_type i=0; i<rands.size(); ++i)
00307         c_rands[i] = csa.bwt[ rands[i] ];
00308 
00309     write_R_output("csa","rank_bwt","begin",times,cnt);
00310     for (size_type i=0; i<times; ++i) {
00311         cnt += csa.rank_bwt(rands[ i&mask ], c_rands[ i&mask ]);
00312     }
00313     write_R_output("csa","rank_bwt","end",times,cnt);
00314     delete [] c_rands;
00315 }
00316 
00317 template<class Csa>
00318 void test_select_bwt_access(const Csa& csa, typename Csa::size_type times=500000)
00319 {
00320     typedef typename Csa::size_type size_type;
00321     size_type cnt=0;
00322 
00323     // precalc queries, distribution of chars is important!
00324     uint64_t mask, s=20;
00325     int_vector<64> rands = get_rnd_positions(s, mask, csa.size());
00326     unsigned char* c_rands = new unsigned char[1<<s];
00327     for (size_type i=0; i<rands.size(); ++i) {
00328         c_rands[i] = csa.bwt[ rands[i] ];
00329         rands[i] = csa.rank_bwt(rands[i]+1, c_rands[i]);
00330     }
00331     write_R_output("csa","select_bwt","begin",times,cnt);
00332     for (size_type i=0; i<times; ++i) {
00333 //              size_type c = i % csa.sigma;
00334 //              cnt += csa.select_bwt( (i % (csa.C[c+1]-csa.C[c])) + 1, csa.comp2char[c]);
00335         cnt += csa.select_bwt(rands[ i&mask ], c_rands[ i&mask]);
00336     }
00337     write_R_output("csa","select_bwt","end",times,cnt);
00338 }
00339 
00341 
00348 template<class Cst>
00349 void test_cst_dfs_iterator(Cst& cst, typename Cst::size_type times=100000)
00350 {
00351     if (times > cst.nodes())
00352         times = cst.nodes();
00353     typedef typename Cst::size_type size_type;
00354     size_type cnt=0;
00355     {
00356         // calc values for cnt
00357         typename Cst::const_iterator it = cst.begin();
00358         for (size_type i=0; i < std::min(times,(size_type)1000); ++i, ++it) {
00359             cnt += cst.depth(*it);
00360         }
00361     }
00362     write_R_output("cst","dfs","begin",times,cnt);
00363     typename Cst::const_iterator it = cst.begin();
00364     for (size_type i=0; i<times; ++i) {
00365         ++it;
00366     }
00367     write_R_output("cst", "dfs", "end", times, cnt + cst.depth(*it));
00368 }
00369 
00371 
00378 template<class Cst>
00379 void test_cst_dfs_iterator_and_depth(Cst& cst, typename Cst::size_type times=1000000, bool output=false)
00380 {
00381     if (times > 2*cst.nodes()-cst.size())
00382         times = 2*cst.nodes()-cst.size();
00383     typedef typename Cst::size_type size_type;
00384     size_type cnt=0;
00385     write_R_output("cst","dfs and depth","begin",times,cnt);
00386     typename Cst::const_iterator it = cst.begin();
00387     if (!output) {
00388         for (size_type i=0; i<times; ++i, ++it) {
00389             if (!cst.is_leaf(*it))
00390                 cnt += cst.depth(*it);
00391         }
00392     } else {
00393         for (size_type i=0; i<times; ++i, ++it) {
00394             if (!cst.is_leaf(*it)) {
00395                 size_type d = cst.depth(*it);
00396                 std::cerr << d << "-[" << cst.lb(*it) << "," << cst.rb(*it) << "] ";
00397                 if (d < 60) {
00398                     for (int i=1; i<=d; ++i)
00399                         std::cerr<< cst.edge(*it, i);
00400                 }
00401                 std::cerr << std::endl;
00402                 cnt += d;
00403             }
00404         }
00405     }
00406     write_R_output("cst","dfs and depth","end",times,cnt);
00407 }
00408 
00410 
00417 template<class Cst>
00418 void test_cst_dfs_iterator_and_id(Cst& cst, typename Cst::size_type times=1000000, bool output=false)
00419 {
00420     if (times > 2*cst.nodes()-cst.size())
00421         times = 2*cst.nodes()-cst.size();
00422     typedef typename Cst::size_type size_type;
00423     size_type cnt=0;
00424     write_R_output("cst","dfs and id","begin",times,cnt);
00425     typename Cst::const_iterator it = cst.begin();
00426     if (!output) {
00427         for (size_type i=0; i<times; ++i, ++it) {
00428             cnt += cst.id(*it);
00429         }
00430     } else {
00431         for (size_type i=0; i<times; ++i, ++it) {
00432             size_type id = cst.id(*it);
00433             std::cerr << id << std::endl;
00434             cnt += id;
00435         }
00436     }
00437     write_R_output("cst","dfs and id","end",times,cnt);
00438 }
00439 
00440 
00441 
00442 // TODO: bottom up iterator
00443 
00445 template<class Lcp>
00446 void test_lcp_random_access(Lcp& lcp, typename Lcp::size_type times=10000000)
00447 {
00448     typedef typename Lcp::size_type size_type;
00449     size_type n = lcp.size();
00450     if (times > n)
00451         times = n;
00452     uint64_t mask;
00453     int_vector<64> rands = get_rnd_positions(20, mask, n);
00454     size_type cnt=0;
00455     write_R_output("lcp","random access","begin",times,cnt);
00456     for (size_type i=0; i<times; ++i) {
00457         cnt += lcp[ rands[ i&mask ] ];
00458     }
00459     write_R_output("lcp","random access","end",times,cnt);
00460 }
00461 
00463 
00468 template<class Lcp>
00469 void test_lcp_sequential_access(Lcp& lcp, typename Lcp::size_type times=10000000)
00470 {
00471     typedef typename Lcp::size_type size_type;
00472     size_type n = lcp.size();
00473     if (times > n)
00474         times = n;
00475     size_type cnt=0;
00476     write_R_output("lcp","sequential access","begin",times,cnt);
00477     for (size_type i=0; i<times; ++i) {
00478         cnt += lcp[ i ];
00479     }
00480     write_R_output("lcp","sequential access","end",times,cnt);
00481 }
00482 
00483 
00485 
00491 template<class Lcp>
00492 void test_lcp_random_sequential_access(Lcp& lcp, typename Lcp::size_type times=1000000, typename Lcp::size_type seq_len=64)
00493 {
00494     typedef typename Lcp::size_type size_type;
00495     size_type n = lcp.size();
00496     if (times > n)
00497         times = n;
00498     if (seq_len >= n)
00499         seq_len = n-1;
00500     const int s = 20;
00501     const uint64_t mask = (1<<s)-1;
00502     int_vector<64> rands(1<<s ,0);
00503     util::set_random_bits(rands, 17);
00504     for (int_vector<64>::size_type i=0; i < rands.size(); ++i) {
00505         rands[i] = rands[i] % n;
00506         if (rands[i] + seq_len >= n)
00507             rands[i] = n - 1 - seq_len;
00508     }
00509     size_type cnt=0;
00510     write_R_output("lcp","random sequential access","begin",times*(seq_len+1),cnt);
00511     for (size_type i=0; i<times; ++i) {
00512         for (size_type j=rands[i&mask], k=0; k<=seq_len; ++k, ++j) {
00513             cnt += lcp[ j ];
00514         }
00515     }
00516     write_R_output("lcp","random sequential access","end",times*(seq_len+1),cnt);
00517 }
00518 
00520 
00525 template<class Cst>
00526 void test_cst_parent_operation(const Cst& cst, typename Cst::size_type times=100000, uint64_t x=17)
00527 {
00528     typedef typename Cst::size_type size_type;
00529     typedef typename Cst::node_type node_type;
00530 
00531     srand(x);
00532     size_type n = cst.csa.size();
00533     // take \f$ time \f$ random leaves
00534     std::vector<node_type> rand_leaf(times);
00535     for (size_type i=0; i<rand_leaf.size(); ++i) {
00536         rand_leaf[i] = cst.ith_leaf(1+ (rand() % n));
00537     }
00538 
00539     node_type p;
00540     size_type cnt=0;
00541     write_R_output("cst","parent","begin",times,cnt);
00542     for (size_type i=0; i<times; ++i, ++cnt) {
00543         p = cst.parent(rand_leaf[i]);
00544         while (p != cst.root()) {
00545             p = cst.parent(p);
00546             ++cnt;
00547         }
00548     }
00549     write_R_output("cst","parent","end",times,cnt);
00550 }
00551 
00552 
00554 
00560 template<class Cst>
00561 void generate_nodes_from_random_leaves(const Cst& cst, typename Cst::size_type times, std::vector<typename Cst::node_type>& nodes, uint64_t x=17)
00562 {
00563     typedef typename Cst::size_type size_type;
00564     typedef typename Cst::node_type node_type;
00565     srand(x);
00566     size_type n = cst.csa.size();
00567     // generate nodes
00568     for (size_type i=0; i<times; ++i) {
00569         node_type p = cst.ith_leaf(1+ (rand() % n));
00570         nodes.push_back(p);
00571         while (p != cst.root()) {
00572             p = cst.parent(p);
00573             nodes.push_back(p);
00574         }
00575     }
00576 }
00577 
00579 
00584 template<class Cst>
00585 void test_cst_child_operation(const Cst& cst, typename Cst::size_type times=5000, uint64_t x=17)
00586 {
00587     typedef typename Cst::size_type size_type;
00588     typedef typename Cst::node_type node_type;
00589 
00590     std::vector<node_type> nodes;
00591     generate_nodes_from_random_leaves(cst, times, nodes, x);
00592 //      for(size_type i=0; i<20; ++i){
00593 //              std::cout<< cst.lb(nodes[i])<<" "<<cst.rb(nodes[i])<<std::endl;
00594 //      }
00595     // choose some chars for the text
00596     unsigned char* letters = new unsigned char[nodes.size()+1];
00597     for (size_type i=0; i<nodes.size(); ++i) {
00598         letters[i] = cst.csa.bwt[i];
00599     }
00600 
00601     node_type c;  // for child node
00602     size_type char_pos=0;
00603     size_type cnt=0;
00604     write_R_output("cst","child","begin",nodes.size(),cnt);
00605     for (size_type i=0; i<nodes.size(); ++i) {
00606 //              if(i<20){
00607 //                      std::cout<<"i="<<i<<" vl="<<cst.lb(nodes[i])<<" rb="<<cst.rb(nodes[i])<<std::endl;
00608 //                      std::cout<<cst.csa[cst.lb(nodes[i])]<<" "<<cst.depth(nodes[i])<<std::endl;
00609 //              }
00610         c = cst.child(nodes[i], letters[i], char_pos);
00611         if (c==cst.root())
00612             ++cnt;
00613     }
00614     write_R_output("cst","child","end",nodes.size(),cnt);
00615     delete [] letters;
00616 }
00617 
00619 
00624 template<class Cst>
00625 void test_cst_1th_child_operation(const Cst& cst, typename Cst::size_type times=1000000, uint64_t x=17)
00626 {
00627     typedef typename Cst::size_type size_type;
00628     typedef typename Cst::node_type node_type;
00629 
00630     std::vector<node_type> nodes;
00631     generate_nodes_from_random_leaves(cst, times, nodes, x);
00632 
00633     node_type c;  // for 1th_child node
00634     size_type cnt=0;
00635     write_R_output("cst","1th_child","begin",nodes.size(),cnt);
00636     for (size_type i=0; i<nodes.size(); ++i) {
00637         c = cst.ith_child(nodes[i], 1);
00638         if (c==cst.root())
00639             ++cnt;
00640     }
00641     write_R_output("cst","1th_child","end",nodes.size(),cnt);
00642 }
00643 
00645 
00650 template<class Cst>
00651 void test_cst_sibling_operation(const Cst& cst, typename Cst::size_type times=100000, uint64_t x=17)
00652 {
00653     typedef typename Cst::size_type size_type;
00654     typedef typename Cst::node_type node_type;
00655 
00656     std::vector<node_type> nodes;
00657     generate_nodes_from_random_leaves(cst, times, nodes, x);
00658     for (size_type i=0; i<nodes.size(); ++i) {
00659         nodes[i] = cst.sibling(nodes[i]);
00660     }
00661 
00662     node_type c;  // for sibling node
00663     size_type cnt=0;
00664     write_R_output("cst","sibling","begin",nodes.size(),cnt);
00665     for (size_type i=0; i<nodes.size(); ++i) {
00666         c = cst.sibling(nodes[i]);
00667         if (c==cst.root())
00668             ++cnt;
00669     }
00670     write_R_output("cst","sibling","end",nodes.size(),cnt);
00671 }
00672 
00674 template<class Cst>
00675 void test_cst_id_operation(const Cst& cst, typename Cst::size_type times=100000, uint64_t x=17)
00676 {
00677     typedef typename Cst::size_type size_type;
00678     typedef typename Cst::node_type node_type;
00679     std::vector<node_type> nodes;
00680     generate_nodes_from_random_leaves(cst, times, nodes, x);
00681 
00682     size_type cnt = 0;
00683     write_R_output("cst","id","begin",nodes.size(),cnt);
00684     for (size_type i=0; i < nodes.size(); ++i) {
00685         cnt += cst.id(nodes[i]);
00686     }
00687     write_R_output("cst","id","end",nodes.size(),cnt);
00688 }
00689 
00691 template<class Cst>
00692 void test_cst_depth_operation(const Cst& cst, typename Cst::size_type times=100000, uint64_t x=17)
00693 {
00694     typedef typename Cst::size_type size_type;
00695     typedef typename Cst::node_type node_type;
00696     std::vector<node_type> nodes;
00697     generate_nodes_from_random_leaves(cst, times, nodes, x);
00698 
00699     size_type cnt = 0;
00700     write_R_output("cst","depth","begin",nodes.size(),cnt);
00701     for (size_type i=0; i < nodes.size(); ++i) {
00702         cnt += cst.depth(nodes[i]);
00703     }
00704     write_R_output("cst","depth","end",nodes.size(),cnt);
00705 }
00706 
00707 
00709 template<class Cst>
00710 void test_cst_depth_operation_for_inner_nodes(const Cst& cst, typename Cst::size_type times=100000, uint64_t x=17)
00711 {
00712     typedef typename Cst::size_type size_type;
00713     typedef typename Cst::node_type node_type;
00714     std::vector<node_type> nodes;
00715     {
00716         std::vector<node_type> nodes2;
00717         generate_nodes_from_random_leaves(cst, times, nodes2, x);
00718         for (size_type i=0; i<nodes2.size(); ++i)
00719             if (!cst.is_leaf(nodes2[i])) {
00720                 nodes.push_back(nodes2[i]);
00721             }
00722     }
00723     size_type cnt = 0;
00724     write_R_output("cst","depth of inner nodes","begin",nodes.size(),cnt);
00725     for (size_type i=0; i < nodes.size(); ++i) {
00726         cnt += cst.depth(nodes[i]);
00727     }
00728     write_R_output("cst","depth of inner nodes","end",nodes.size(),cnt);
00729 }
00730 
00732 template<class Cst>
00733 void test_cst_lca_operation(const Cst& cst, typename Cst::size_type times=1000000, uint64_t x=17)
00734 {
00735     typedef typename Cst::size_type size_type;
00736     typedef typename Cst::node_type node_type;
00737     //  generate \f$2^{19}\f$ random pairs of leafs
00738     size_type n = cst.csa.size();
00739     uint64_t mask = (1<<20)-1;
00740     std::vector<node_type> nodes(1<<20);
00741     srand(x);
00742     for (size_type i=0; i < nodes.size(); ++i) {
00743         nodes[i] = cst.ith_leaf(rand()%n + 1);
00744     }
00745 
00746     size_type cnt=0;
00747     write_R_output("cst","lca","begin",times,cnt);
00748     for (size_type i=0; i<times; ++i) {
00749         node_type v = cst.lca(nodes[(2*i) & mask], nodes[(2*i+1) & mask]);
00750         if (v == cst.root())
00751             cnt++;
00752 //              if(i<30)
00753 //                      std::cout<<"lca("<<cst.lb(nodes[(2*i)&mask])<<","<<cst.lb(nodes[(2*i+1)&mask])<<")=("<<cst.lb(v)<<","<<cst.rb(v)<<")"<<std::endl;
00754     }
00755     write_R_output("cst","lca","end",times,cnt);
00756 }
00757 
00759 template<class Cst>
00760 void test_cst_sl_operation(const Cst& cst, typename Cst::size_type times=500, uint64_t x=17)
00761 {
00762     typedef typename Cst::size_type size_type;
00763     typedef typename Cst::node_type node_type;
00764     size_type n = cst.csa.size();
00765     if (times > n)
00766         times = n;
00767 
00768     std::vector<node_type> nodes(times);
00769     srand(x);
00770     // take \f$ times \f$ random leaves and calculate each parent
00771     for (size_type i=0; i<times; ++i) {
00772         nodes[i] = cst.parent(cst.ith_leaf(rand()%n + 1));
00773     }
00774 
00775     size_type cnt=0;
00776     times = 0;
00777     write_R_output("cst","sl","begin",0,cnt);
00778     for (size_type i=0; i<nodes.size(); ++i) {
00779         node_type v = nodes[i];
00780 //              std::cout<<"v="<<cst.lb(v)<<" "<<cst.rb(v)<<std::endl;
00781 //              size_type d = cst.depth(v);
00782         while (v != cst.root()) { // while v is not the root
00783             ++cnt;
00784             v = cst.sl(v); // follow suffix link
00785 //                      if( cnt < 30 ){
00786 //                              std::cout<< cnt << " " << cst.lb(v) << " " << cst.rb(v) << " " << cst.depth(v) << std::endl;
00787 //                      }
00788 //                      size_type d2 = cst.depth(v);
00789 //                      if( d != d2+1 ){
00790 //                              std::cout<<"error at cnt "<<cnt<<" d="<<d<<" d2="<<d2<<std::endl;
00791 //                      }
00792 //                      d = d2;
00793         }
00794     }
00795     write_R_output("cst","sl","end",cnt,cnt);
00796 }
00797 
00799 
00803 template<class Cst>
00804 void test_cst_matching_statistics(const Cst& cst, unsigned char* S2, typename Cst::size_type n2)
00805 {
00806     typedef typename Cst::size_type size_type;
00807     typedef typename Cst::node_type node_type;
00808 
00809     size_type cnt = 0;
00810     write_R_output("cst","mstats","begin",n2,cnt);
00811     size_type q  = 0;                                           // current match length
00812     size_type p2 = n2-1;              // position in S2
00813     size_type i  = 0, j = cst.csa.size()-1; // \f$ \epsilon \f$ matches all suffixes of S1
00814     while (p2+1 > 0) {
00815         size_type lb, rb;
00816         // perform backward search on interval \f$ [i,j] \f$
00817         size_type size = algorithm::backward_search(cst.csa, i, j, S2[p2], lb, rb);
00818         if (size > 0) {
00819             q = q + 1;
00820             i = lb; j = rb;
00821             p2 = p2 - 1;
00822         } else if (i==0 and j == cst.csa.size()) {
00823             p2 = p2 -1;
00824         } else {
00825             // map interval to a node of the cst and calculate parent
00826             node_type p = cst.parent(cst.node(i, j));
00827             q = cst.depth(p);   // update match length
00828             i = cst.lb(p);              // update left bound
00829             j = cst.rb(p);              // update right bound
00830         }
00831         cnt += q;
00832     }
00833     write_R_output("cst","mstats","end",n2,cnt);
00834 }
00835 
00836 // test the speed of find_close at random opening parentheses
00837 template<class Bps>
00838 void test_bps_find_close_and_enclose(const Bps& bps, const bit_vector& b, uint64_t times=10000000, uint64_t x=17)
00839 {
00840     typedef bit_vector::size_type size_type;
00841     uint64_t mask;
00842 //      uint64_t n = bps.size();
00843     int_vector<64> rands = get_rnd_positions(20, mask, bps.size());
00844     for (size_type i=0; i<rands.size(); ++i) {
00845         if (!b[rands[i]]) { // if there is no opening parentheses at position rands[i]
00846             rands[i] = bps.find_open(rands[i]);
00847         }
00848     }
00849     uint64_t cnt = 0;
00850     write_R_output("bps","find_close","begin",times, cnt);
00851     for (size_type i=0; i<times; ++i) {
00852         cnt += bps.find_close(rands[i&mask]);
00853     }
00854     write_R_output("bps","find_close","end",times, cnt);
00855 
00856     cnt = 0;
00857     write_R_output("bps","enclose rand","begin",times, cnt);
00858     for (size_type i=0; i<times; ++i) {
00859         cnt += bps.enclose(rands[i&mask]);
00860     }
00861     write_R_output("bps","enclose rand","end",times, cnt);
00862 
00863     cnt = 0;
00864     size_type cnt2=0;
00865     write_R_output("bps","enclose tree","begin",times, cnt);
00866     for (size_type i=0,enc, size=bps.size(); cnt2 < times; ++i) {
00867         enc = rands[i&mask];
00868         while ((enc = bps.enclose(enc)) != size) {
00869             cnt += enc;
00870             ++cnt2;
00871         }
00872         ++cnt2;
00873     }
00874     write_R_output("bps","enclose tree","end",times, cnt);
00875 }
00876 
00877 // test the speed of find_close at random opening parentheses
00878 template<class Bps>
00879 void test_bps_find_open(const Bps& bps, const bit_vector& b, uint64_t times=10000000, uint64_t x=17)
00880 {
00881     typedef bit_vector::size_type size_type;
00882     uint64_t mask;
00883     uint64_t n = bps.size();
00884     int_vector<64> rands = get_rnd_positions(20, mask, n);
00885     for (size_type i=0; i<rands.size(); ++i) {
00886         if (b[rands[i]]) {
00887             rands[i] = bps.find_close(rands[i]);
00888         }
00889     }
00890     uint64_t cnt = 0;
00891     write_R_output("bps","find_open","begin",times, cnt);
00892     for (size_type i=0; i<times; ++i) {
00893         cnt += bps.find_open(rands[i&mask]);
00894     }
00895     write_R_output("bps","find_open","end",times, cnt);
00896 }
00897 
00898 // test the speed of the double enclose method for random opening parentheses i and i+1
00899 template<class Bps>
00900 void test_bps_double_enclose(const Bps& bps, const bit_vector& b, uint64_t times=10000000, uint64_t x=17)
00901 {
00902     typedef bit_vector::size_type size_type;
00903     uint64_t mask;
00904     uint64_t n = bps.size();
00905     int_vector<64> rands = get_rnd_positions(20, mask, bps.size());
00906     for (size_type i=0; i<rands.size()/2; ++i) {
00907         if (!b[rands[2*i]]) { // if there is no opening parentheses at position rands[i]
00908             uint64_t pos = (rands[2*i]+1)%n;
00909             while (!b[pos] and pos != rands[2*i])  // go forward until we get an opening one
00910                 pos = (pos+1) % n;
00911             if (pos + 1000 > rands.size())
00912                 pos = 0;
00913             rands[2*i] = pos;
00914         }
00915         {
00916             uint64_t pos = (rands[2*i]+1)%n;
00917             while (!b[pos] and pos != rands[2*i])  // go forward until we get the next opening one
00918                 pos = (pos+1) % n;
00919             rands[2*i+1] = pos;
00920         }
00921     }
00922     uint64_t cnt = 0;
00923     write_R_output("bps","double_enclose","begin",times, cnt);
00924     for (size_type i=0; i<times; ++i) {
00925         cnt += bps.double_enclose(rands[(i*2)&mask], rands[(i*2+1)&mask]);
00926     }
00927     write_R_output("bps","double_enclose","end",times, cnt);
00928 }
00929 
00930 
00931 }// end namespace sdsl
00932 
00933 #endif