SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2010 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef INCLUDE_SDSL_TEST_INDEX_PERFORMANCE 00022 #define INCLUDE_SDSL_TEST_INDEX_PERFORMANCE 00023 00024 #include "int_vector.hpp" // for bit_vector and int_vector 00025 #include "testutils.hpp" // for write_R_output 00026 #include "util.hpp" // for 00027 #include "algorithms.hpp" // for backward_search 00028 #include "testutils.hpp" // for file 00029 #include <cstdlib> // for rand 00030 #include <algorithm> // for swap 00031 #include <vector> // for std::vector 00032 #include <iostream> 00033 00034 namespace sdsl 00035 { 00036 00038 /* 00039 */ 00040 int_vector<64> get_rnd_positions(uint8_t log_s, uint64_t& mask, uint64_t m=0, uint64_t x=17); 00041 00042 template<class Vector> 00043 void test_int_vector_random_access(const Vector& v, bit_vector::size_type times=100000000) 00044 { 00045 typedef bit_vector::size_type size_type; 00046 uint64_t mask; 00047 int_vector<64> rands = get_rnd_positions(20, mask, v.size()); 00048 size_type cnt=0; 00049 write_R_output("int_vector","random access","begin",times,cnt); 00050 for (size_type i=0; i<times; ++i) { 00051 cnt += v[rands[ i&mask ]]; 00052 } 00053 write_R_output("int_vector","random access","end",times,cnt); 00054 } 00055 00056 template<class Vector> 00057 void test_int_vector_random_write(Vector& v, bit_vector::size_type times=100000000) 00058 { 00059 typedef bit_vector::size_type size_type; 00060 uint64_t mask; 00061 int_vector<64> rands = get_rnd_positions(20, mask, v.size()); 00062 size_type cnt=0; 00063 write_R_output("int_vector","random write","begin",times,cnt); 00064 for (size_type i=0; i<times; ++i) { 00065 cnt += (v[rands[ i&mask ]] = i); 00066 } 00067 write_R_output("int_vector","random write","end",times,cnt); 00068 } 00069 00070 template<class Vector> 00071 void test_int_vector_sequential_write(Vector& v, bit_vector::size_type times=100000000) 00072 { 00073 typedef bit_vector::size_type size_type; 00074 const uint64_t mask = (1ULL << bit_magic::l1BP(v.size()))-1; 00075 // std::cout<<" mask="<<mask<<std::endl; 00076 size_type cnt=0; 00077 write_R_output("int_vector","seq write","begin",times,cnt); 00078 for (size_type i=0; i<times; ++i) { 00079 cnt += (v[i&mask] = i); 00080 } 00081 write_R_output("int_vector","seq write","end",times,cnt); 00082 } 00083 00085 /* 00086 */ 00087 template<class Rank> 00088 void test_rank_random_access(const Rank& rank, bit_vector::size_type times=20000000) 00089 { 00090 typedef bit_vector::size_type size_type; 00091 uint64_t mask; 00092 int_vector<64> rands = get_rnd_positions(20, mask, rank.size()+1); 00093 size_type cnt=0; 00094 write_R_output("rank","random access","begin",times,cnt); 00095 for (size_type i=0; i<times; ++i) { 00096 cnt += rank.rank(rands[ i&mask ]); 00097 } 00098 write_R_output("rank","random access","end",times,cnt); 00099 } 00100 00102 /* 00103 * param size The size of the bit vector in bits for which to rank_support should be created 00104 */ 00105 template<class Rank> 00106 void test_rank_construction(bit_vector::size_type size=838860800) 00107 { 00108 typedef bit_vector::size_type size_type; 00109 bit_vector b(size); 00110 util::set_random_bits(b, 17); 00111 std::cout<<"# bit vector size of rank construct : "<<size<<std::endl; 00112 write_R_output("rank","construct","begin",1,0); 00113 Rank rs(&b); // construct rank_support data structure 00114 write_R_output("rank","construct","end",1,rs(size)); 00115 } 00116 00117 00118 00120 /* 00121 */ 00122 template<class Select> 00123 void test_select_random_access(const Select& select, bit_vector::size_type times=20000000) 00124 { 00125 typedef bit_vector::size_type size_type; 00126 const int s = 20; 00127 const uint64_t mask = (1<<s)-1; 00128 int_vector<64> rands(1<<s ,0); 00129 util::set_random_bits(rands, 17); 00130 size_type args = util::get_one_bits(*(select.v)); 00131 util::all_elements_mod(rands, args); 00132 for (size_type i=0; i<rands.size(); ++i) 00133 rands[i] = rands[i]+1; 00134 size_type cnt=0; 00135 write_R_output("select","random access","begin",times,cnt); 00136 for (size_type i=0; i<times; ++i) { 00137 cnt += select.select(rands[ i&mask ]); 00138 } 00139 write_R_output("select","random access","end",times,cnt); 00140 } 00141 00143 /* 00144 */ 00145 template<class Select> 00146 void test_select_random_access(const Select& select, bit_vector::size_type args, bit_vector::size_type times) 00147 { 00148 typedef bit_vector::size_type size_type; 00149 const int s = 20; 00150 const uint64_t mask = (1<<s)-1; 00151 int_vector<64> rands(1<<s ,0); 00152 util::set_random_bits(rands, 17); 00153 util::all_elements_mod(rands, args); 00154 for (size_type i=0; i<rands.size(); ++i) 00155 rands[i] = rands[i]+1; 00156 size_type cnt=0; 00157 write_R_output("select","random access","begin",times,cnt); 00158 for (size_type i=0; i<times; ++i) { 00159 cnt += select.select(rands[ i&mask ]); 00160 } 00161 write_R_output("select","random access","end",times,cnt); 00162 } 00163 00165 /* 00166 * param size The size of the bit vector in bits for which to rank_support should be created 00167 */ 00168 template<class Select> 00169 void test_select_construction(bit_vector::size_type size=838860800) 00170 { 00171 typedef bit_vector::size_type size_type; 00172 bit_vector b(size); 00173 util::set_random_bits(b, 17); 00174 std::cout<<"# bit vector size of select construct : "<<size<<std::endl; 00175 write_R_output("select","construct","begin",1,0); 00176 Select sls(&b); // construct rank_support data structure 00177 write_R_output("select","construct","end",1, sls(1)); 00178 std::cout<<"# size of the select_support_mcl : "<< ((double)util::get_size_in_bytes(sls))/util::get_size_in_bytes(b) <<std::endl; 00179 } 00180 00181 template<class Csa> 00182 void test_csa_access(const Csa& csa, typename Csa::size_type times=1000000) 00183 { 00184 typedef typename Csa::size_type size_type; 00185 size_type cnt=0; 00186 uint64_t mask; 00187 int_vector<64> rands = get_rnd_positions(20, mask, csa.size()); 00188 write_R_output("csa","csa[]","begin",times,cnt); 00189 for (size_type i=0; i<times; ++i) { 00190 cnt += csa[ rands[ i&mask ] ]; 00191 } 00192 write_R_output("csa","csa[]","end",times,cnt); 00193 } 00194 00195 template<class Csa> 00196 void test_icsa_access(const Csa& csa, typename Csa::size_type times=1000000) 00197 { 00198 typedef typename Csa::size_type size_type; 00199 size_type cnt=0; 00200 uint64_t mask; 00201 int_vector<64> rands = get_rnd_positions(20, mask, csa.size()); 00202 write_R_output("csa","csa()","begin",times,cnt); 00203 for (size_type i=0; i<times; ++i) { 00204 cnt += csa(rands[ i&mask ]); 00205 } 00206 write_R_output("csa","csa()","end",times,cnt); 00207 } 00208 00209 // Test random access on \f$\Psi\f$ function 00210 template<class Csa> 00211 void test_psi_access(const Csa& csa, typename Csa::size_type times=1000000) 00212 { 00213 typedef typename Csa::size_type size_type; 00214 size_type cnt=0; 00215 uint64_t mask; 00216 int_vector<64> rands = get_rnd_positions(20, mask, csa.size()); 00217 write_R_output("csa","psi[]","begin",times,cnt); 00218 for (size_type i=0; i<times; ++i) { 00219 cnt += csa.psi[rands[ i&mask ]]; 00220 } 00221 write_R_output("csa","psi[]","end",times,cnt); 00222 } 00223 00225 template<class Csa> 00226 void test_lf_access(const Csa& csa, typename Csa::size_type times=1000000) 00227 { 00228 typedef typename Csa::size_type size_type; 00229 size_type cnt=0; 00230 uint64_t mask; 00231 int_vector<64> rands = get_rnd_positions(20, mask, csa.size()); 00232 write_R_output("csa","psi()","begin",times,cnt); 00233 for (size_type i=0; i<times; ++i) { 00234 cnt += csa.psi(rands[ i&mask ]); 00235 } 00236 write_R_output("csa","psi()","end",times,cnt); 00237 } 00238 00240 template<class Csa> 00241 void test_bwt_access(const Csa& csa, typename Csa::size_type times=1000000) 00242 { 00243 typedef typename Csa::size_type size_type; 00244 uint64_t mask; 00245 int_vector<64> rands = get_rnd_positions(20, mask, csa.size()); 00246 size_type cnt=0; 00247 write_R_output("csa","bwt[]","begin",times,cnt); 00248 for (size_type i=0; i<times; ++i) { 00249 cnt += csa.bwt[ rands[ i&mask ] ]; 00250 } 00251 write_R_output("csa","bwt[]","end",times,cnt); 00252 } 00253 00255 00260 template<class Csa> 00261 void test_pattern_matching(const Csa& csa, 00262 const char* file_name, 00263 const typename Csa::size_type pattern_len=20, 00264 typename Csa::size_type times=1000000) 00265 { 00266 typedef typename Csa::size_type size_type; 00267 uint64_t mask; 00268 int_vector<64> rands = get_rnd_positions(15, mask, csa.size()-pattern_len); 00269 unsigned char* patterns = new unsigned char[rands.size()*pattern_len + 2]; 00270 write_R_output("csa","extract patterns","begin",times,0); 00271 00272 size_type file_size = 0; 00273 char* ccc = NULL; 00274 if ((file_size=file::read_text(file_name, ccc)) == 0) { 00275 throw std::logic_error("file " + std::string(file_name) + " has size 0 or could not be read"); 00276 } 00277 00278 for (size_type i=0; i<rands.size(); ++i) { 00279 for (size_type j=rands[i]; j < rands[i] + pattern_len; ++j) 00280 patterns[i*pattern_len + (j-rands[i])] = ccc[j]; 00281 } 00282 delete [] ccc; 00283 write_R_output("csa","extract patterns","end",times,0); 00284 00285 size_type cnt=0; 00286 write_R_output("csa","pattern_matching","begin",times,cnt); 00287 for (size_type i=0, l_res=0, r_res=0; i<times; ++i) { 00288 cnt += algorithm::backward_search(csa, 0, csa.size(), 00289 (patterns + ((i&mask)*pattern_len)), 00290 pattern_len, l_res, r_res); 00291 } 00292 write_R_output("csa","pattern_matching","end",times,cnt); 00293 delete [] patterns; 00294 } 00295 00296 00297 template<class Csa> 00298 void test_rank_bwt_access(const Csa& csa, typename Csa::size_type times=1000000) 00299 { 00300 typedef typename Csa::size_type size_type; 00301 size_type cnt=0; 00302 // precalc queries, distribution of chars is important! 00303 uint64_t mask, s=20; 00304 int_vector<64> rands = get_rnd_positions(s, mask, csa.size()+1); 00305 unsigned char* c_rands = new unsigned char[1<<s]; 00306 for (size_type i=0; i<rands.size(); ++i) 00307 c_rands[i] = csa.bwt[ rands[i] ]; 00308 00309 write_R_output("csa","rank_bwt","begin",times,cnt); 00310 for (size_type i=0; i<times; ++i) { 00311 cnt += csa.rank_bwt(rands[ i&mask ], c_rands[ i&mask ]); 00312 } 00313 write_R_output("csa","rank_bwt","end",times,cnt); 00314 delete [] c_rands; 00315 } 00316 00317 template<class Csa> 00318 void test_select_bwt_access(const Csa& csa, typename Csa::size_type times=500000) 00319 { 00320 typedef typename Csa::size_type size_type; 00321 size_type cnt=0; 00322 00323 // precalc queries, distribution of chars is important! 00324 uint64_t mask, s=20; 00325 int_vector<64> rands = get_rnd_positions(s, mask, csa.size()); 00326 unsigned char* c_rands = new unsigned char[1<<s]; 00327 for (size_type i=0; i<rands.size(); ++i) { 00328 c_rands[i] = csa.bwt[ rands[i] ]; 00329 rands[i] = csa.rank_bwt(rands[i]+1, c_rands[i]); 00330 } 00331 write_R_output("csa","select_bwt","begin",times,cnt); 00332 for (size_type i=0; i<times; ++i) { 00333 // size_type c = i % csa.sigma; 00334 // cnt += csa.select_bwt( (i % (csa.C[c+1]-csa.C[c])) + 1, csa.comp2char[c]); 00335 cnt += csa.select_bwt(rands[ i&mask ], c_rands[ i&mask]); 00336 } 00337 write_R_output("csa","select_bwt","end",times,cnt); 00338 } 00339 00341 00348 template<class Cst> 00349 void test_cst_dfs_iterator(Cst& cst, typename Cst::size_type times=100000) 00350 { 00351 if (times > cst.nodes()) 00352 times = cst.nodes(); 00353 typedef typename Cst::size_type size_type; 00354 size_type cnt=0; 00355 { 00356 // calc values for cnt 00357 typename Cst::const_iterator it = cst.begin(); 00358 for (size_type i=0; i < std::min(times,(size_type)1000); ++i, ++it) { 00359 cnt += cst.depth(*it); 00360 } 00361 } 00362 write_R_output("cst","dfs","begin",times,cnt); 00363 typename Cst::const_iterator it = cst.begin(); 00364 for (size_type i=0; i<times; ++i) { 00365 ++it; 00366 } 00367 write_R_output("cst", "dfs", "end", times, cnt + cst.depth(*it)); 00368 } 00369 00371 00378 template<class Cst> 00379 void test_cst_dfs_iterator_and_depth(Cst& cst, typename Cst::size_type times=1000000, bool output=false) 00380 { 00381 if (times > 2*cst.nodes()-cst.size()) 00382 times = 2*cst.nodes()-cst.size(); 00383 typedef typename Cst::size_type size_type; 00384 size_type cnt=0; 00385 write_R_output("cst","dfs and depth","begin",times,cnt); 00386 typename Cst::const_iterator it = cst.begin(); 00387 if (!output) { 00388 for (size_type i=0; i<times; ++i, ++it) { 00389 if (!cst.is_leaf(*it)) 00390 cnt += cst.depth(*it); 00391 } 00392 } else { 00393 for (size_type i=0; i<times; ++i, ++it) { 00394 if (!cst.is_leaf(*it)) { 00395 size_type d = cst.depth(*it); 00396 std::cerr << d << "-[" << cst.lb(*it) << "," << cst.rb(*it) << "] "; 00397 if (d < 60) { 00398 for (int i=1; i<=d; ++i) 00399 std::cerr<< cst.edge(*it, i); 00400 } 00401 std::cerr << std::endl; 00402 cnt += d; 00403 } 00404 } 00405 } 00406 write_R_output("cst","dfs and depth","end",times,cnt); 00407 } 00408 00410 00417 template<class Cst> 00418 void test_cst_dfs_iterator_and_id(Cst& cst, typename Cst::size_type times=1000000, bool output=false) 00419 { 00420 if (times > 2*cst.nodes()-cst.size()) 00421 times = 2*cst.nodes()-cst.size(); 00422 typedef typename Cst::size_type size_type; 00423 size_type cnt=0; 00424 write_R_output("cst","dfs and id","begin",times,cnt); 00425 typename Cst::const_iterator it = cst.begin(); 00426 if (!output) { 00427 for (size_type i=0; i<times; ++i, ++it) { 00428 cnt += cst.id(*it); 00429 } 00430 } else { 00431 for (size_type i=0; i<times; ++i, ++it) { 00432 size_type id = cst.id(*it); 00433 std::cerr << id << std::endl; 00434 cnt += id; 00435 } 00436 } 00437 write_R_output("cst","dfs and id","end",times,cnt); 00438 } 00439 00440 00441 00442 // TODO: bottom up iterator 00443 00445 template<class Lcp> 00446 void test_lcp_random_access(Lcp& lcp, typename Lcp::size_type times=10000000) 00447 { 00448 typedef typename Lcp::size_type size_type; 00449 size_type n = lcp.size(); 00450 if (times > n) 00451 times = n; 00452 uint64_t mask; 00453 int_vector<64> rands = get_rnd_positions(20, mask, n); 00454 size_type cnt=0; 00455 write_R_output("lcp","random access","begin",times,cnt); 00456 for (size_type i=0; i<times; ++i) { 00457 cnt += lcp[ rands[ i&mask ] ]; 00458 } 00459 write_R_output("lcp","random access","end",times,cnt); 00460 } 00461 00463 00468 template<class Lcp> 00469 void test_lcp_sequential_access(Lcp& lcp, typename Lcp::size_type times=10000000) 00470 { 00471 typedef typename Lcp::size_type size_type; 00472 size_type n = lcp.size(); 00473 if (times > n) 00474 times = n; 00475 size_type cnt=0; 00476 write_R_output("lcp","sequential access","begin",times,cnt); 00477 for (size_type i=0; i<times; ++i) { 00478 cnt += lcp[ i ]; 00479 } 00480 write_R_output("lcp","sequential access","end",times,cnt); 00481 } 00482 00483 00485 00491 template<class Lcp> 00492 void test_lcp_random_sequential_access(Lcp& lcp, typename Lcp::size_type times=1000000, typename Lcp::size_type seq_len=64) 00493 { 00494 typedef typename Lcp::size_type size_type; 00495 size_type n = lcp.size(); 00496 if (times > n) 00497 times = n; 00498 if (seq_len >= n) 00499 seq_len = n-1; 00500 const int s = 20; 00501 const uint64_t mask = (1<<s)-1; 00502 int_vector<64> rands(1<<s ,0); 00503 util::set_random_bits(rands, 17); 00504 for (int_vector<64>::size_type i=0; i < rands.size(); ++i) { 00505 rands[i] = rands[i] % n; 00506 if (rands[i] + seq_len >= n) 00507 rands[i] = n - 1 - seq_len; 00508 } 00509 size_type cnt=0; 00510 write_R_output("lcp","random sequential access","begin",times*(seq_len+1),cnt); 00511 for (size_type i=0; i<times; ++i) { 00512 for (size_type j=rands[i&mask], k=0; k<=seq_len; ++k, ++j) { 00513 cnt += lcp[ j ]; 00514 } 00515 } 00516 write_R_output("lcp","random sequential access","end",times*(seq_len+1),cnt); 00517 } 00518 00520 00525 template<class Cst> 00526 void test_cst_parent_operation(const Cst& cst, typename Cst::size_type times=100000, uint64_t x=17) 00527 { 00528 typedef typename Cst::size_type size_type; 00529 typedef typename Cst::node_type node_type; 00530 00531 srand(x); 00532 size_type n = cst.csa.size(); 00533 // take \f$ time \f$ random leaves 00534 std::vector<node_type> rand_leaf(times); 00535 for (size_type i=0; i<rand_leaf.size(); ++i) { 00536 rand_leaf[i] = cst.ith_leaf(1+ (rand() % n)); 00537 } 00538 00539 node_type p; 00540 size_type cnt=0; 00541 write_R_output("cst","parent","begin",times,cnt); 00542 for (size_type i=0; i<times; ++i, ++cnt) { 00543 p = cst.parent(rand_leaf[i]); 00544 while (p != cst.root()) { 00545 p = cst.parent(p); 00546 ++cnt; 00547 } 00548 } 00549 write_R_output("cst","parent","end",times,cnt); 00550 } 00551 00552 00554 00560 template<class Cst> 00561 void generate_nodes_from_random_leaves(const Cst& cst, typename Cst::size_type times, std::vector<typename Cst::node_type>& nodes, uint64_t x=17) 00562 { 00563 typedef typename Cst::size_type size_type; 00564 typedef typename Cst::node_type node_type; 00565 srand(x); 00566 size_type n = cst.csa.size(); 00567 // generate nodes 00568 for (size_type i=0; i<times; ++i) { 00569 node_type p = cst.ith_leaf(1+ (rand() % n)); 00570 nodes.push_back(p); 00571 while (p != cst.root()) { 00572 p = cst.parent(p); 00573 nodes.push_back(p); 00574 } 00575 } 00576 } 00577 00579 00584 template<class Cst> 00585 void test_cst_child_operation(const Cst& cst, typename Cst::size_type times=5000, uint64_t x=17) 00586 { 00587 typedef typename Cst::size_type size_type; 00588 typedef typename Cst::node_type node_type; 00589 00590 std::vector<node_type> nodes; 00591 generate_nodes_from_random_leaves(cst, times, nodes, x); 00592 // for(size_type i=0; i<20; ++i){ 00593 // std::cout<< cst.lb(nodes[i])<<" "<<cst.rb(nodes[i])<<std::endl; 00594 // } 00595 // choose some chars for the text 00596 unsigned char* letters = new unsigned char[nodes.size()+1]; 00597 for (size_type i=0; i<nodes.size(); ++i) { 00598 letters[i] = cst.csa.bwt[i]; 00599 } 00600 00601 node_type c; // for child node 00602 size_type char_pos=0; 00603 size_type cnt=0; 00604 write_R_output("cst","child","begin",nodes.size(),cnt); 00605 for (size_type i=0; i<nodes.size(); ++i) { 00606 // if(i<20){ 00607 // std::cout<<"i="<<i<<" vl="<<cst.lb(nodes[i])<<" rb="<<cst.rb(nodes[i])<<std::endl; 00608 // std::cout<<cst.csa[cst.lb(nodes[i])]<<" "<<cst.depth(nodes[i])<<std::endl; 00609 // } 00610 c = cst.child(nodes[i], letters[i], char_pos); 00611 if (c==cst.root()) 00612 ++cnt; 00613 } 00614 write_R_output("cst","child","end",nodes.size(),cnt); 00615 delete [] letters; 00616 } 00617 00619 00624 template<class Cst> 00625 void test_cst_1th_child_operation(const Cst& cst, typename Cst::size_type times=1000000, uint64_t x=17) 00626 { 00627 typedef typename Cst::size_type size_type; 00628 typedef typename Cst::node_type node_type; 00629 00630 std::vector<node_type> nodes; 00631 generate_nodes_from_random_leaves(cst, times, nodes, x); 00632 00633 node_type c; // for 1th_child node 00634 size_type cnt=0; 00635 write_R_output("cst","1th_child","begin",nodes.size(),cnt); 00636 for (size_type i=0; i<nodes.size(); ++i) { 00637 c = cst.ith_child(nodes[i], 1); 00638 if (c==cst.root()) 00639 ++cnt; 00640 } 00641 write_R_output("cst","1th_child","end",nodes.size(),cnt); 00642 } 00643 00645 00650 template<class Cst> 00651 void test_cst_sibling_operation(const Cst& cst, typename Cst::size_type times=100000, uint64_t x=17) 00652 { 00653 typedef typename Cst::size_type size_type; 00654 typedef typename Cst::node_type node_type; 00655 00656 std::vector<node_type> nodes; 00657 generate_nodes_from_random_leaves(cst, times, nodes, x); 00658 for (size_type i=0; i<nodes.size(); ++i) { 00659 nodes[i] = cst.sibling(nodes[i]); 00660 } 00661 00662 node_type c; // for sibling node 00663 size_type cnt=0; 00664 write_R_output("cst","sibling","begin",nodes.size(),cnt); 00665 for (size_type i=0; i<nodes.size(); ++i) { 00666 c = cst.sibling(nodes[i]); 00667 if (c==cst.root()) 00668 ++cnt; 00669 } 00670 write_R_output("cst","sibling","end",nodes.size(),cnt); 00671 } 00672 00674 template<class Cst> 00675 void test_cst_id_operation(const Cst& cst, typename Cst::size_type times=100000, uint64_t x=17) 00676 { 00677 typedef typename Cst::size_type size_type; 00678 typedef typename Cst::node_type node_type; 00679 std::vector<node_type> nodes; 00680 generate_nodes_from_random_leaves(cst, times, nodes, x); 00681 00682 size_type cnt = 0; 00683 write_R_output("cst","id","begin",nodes.size(),cnt); 00684 for (size_type i=0; i < nodes.size(); ++i) { 00685 cnt += cst.id(nodes[i]); 00686 } 00687 write_R_output("cst","id","end",nodes.size(),cnt); 00688 } 00689 00691 template<class Cst> 00692 void test_cst_depth_operation(const Cst& cst, typename Cst::size_type times=100000, uint64_t x=17) 00693 { 00694 typedef typename Cst::size_type size_type; 00695 typedef typename Cst::node_type node_type; 00696 std::vector<node_type> nodes; 00697 generate_nodes_from_random_leaves(cst, times, nodes, x); 00698 00699 size_type cnt = 0; 00700 write_R_output("cst","depth","begin",nodes.size(),cnt); 00701 for (size_type i=0; i < nodes.size(); ++i) { 00702 cnt += cst.depth(nodes[i]); 00703 } 00704 write_R_output("cst","depth","end",nodes.size(),cnt); 00705 } 00706 00707 00709 template<class Cst> 00710 void test_cst_depth_operation_for_inner_nodes(const Cst& cst, typename Cst::size_type times=100000, uint64_t x=17) 00711 { 00712 typedef typename Cst::size_type size_type; 00713 typedef typename Cst::node_type node_type; 00714 std::vector<node_type> nodes; 00715 { 00716 std::vector<node_type> nodes2; 00717 generate_nodes_from_random_leaves(cst, times, nodes2, x); 00718 for (size_type i=0; i<nodes2.size(); ++i) 00719 if (!cst.is_leaf(nodes2[i])) { 00720 nodes.push_back(nodes2[i]); 00721 } 00722 } 00723 size_type cnt = 0; 00724 write_R_output("cst","depth of inner nodes","begin",nodes.size(),cnt); 00725 for (size_type i=0; i < nodes.size(); ++i) { 00726 cnt += cst.depth(nodes[i]); 00727 } 00728 write_R_output("cst","depth of inner nodes","end",nodes.size(),cnt); 00729 } 00730 00732 template<class Cst> 00733 void test_cst_lca_operation(const Cst& cst, typename Cst::size_type times=1000000, uint64_t x=17) 00734 { 00735 typedef typename Cst::size_type size_type; 00736 typedef typename Cst::node_type node_type; 00737 // generate \f$2^{19}\f$ random pairs of leafs 00738 size_type n = cst.csa.size(); 00739 uint64_t mask = (1<<20)-1; 00740 std::vector<node_type> nodes(1<<20); 00741 srand(x); 00742 for (size_type i=0; i < nodes.size(); ++i) { 00743 nodes[i] = cst.ith_leaf(rand()%n + 1); 00744 } 00745 00746 size_type cnt=0; 00747 write_R_output("cst","lca","begin",times,cnt); 00748 for (size_type i=0; i<times; ++i) { 00749 node_type v = cst.lca(nodes[(2*i) & mask], nodes[(2*i+1) & mask]); 00750 if (v == cst.root()) 00751 cnt++; 00752 // if(i<30) 00753 // std::cout<<"lca("<<cst.lb(nodes[(2*i)&mask])<<","<<cst.lb(nodes[(2*i+1)&mask])<<")=("<<cst.lb(v)<<","<<cst.rb(v)<<")"<<std::endl; 00754 } 00755 write_R_output("cst","lca","end",times,cnt); 00756 } 00757 00759 template<class Cst> 00760 void test_cst_sl_operation(const Cst& cst, typename Cst::size_type times=500, uint64_t x=17) 00761 { 00762 typedef typename Cst::size_type size_type; 00763 typedef typename Cst::node_type node_type; 00764 size_type n = cst.csa.size(); 00765 if (times > n) 00766 times = n; 00767 00768 std::vector<node_type> nodes(times); 00769 srand(x); 00770 // take \f$ times \f$ random leaves and calculate each parent 00771 for (size_type i=0; i<times; ++i) { 00772 nodes[i] = cst.parent(cst.ith_leaf(rand()%n + 1)); 00773 } 00774 00775 size_type cnt=0; 00776 times = 0; 00777 write_R_output("cst","sl","begin",0,cnt); 00778 for (size_type i=0; i<nodes.size(); ++i) { 00779 node_type v = nodes[i]; 00780 // std::cout<<"v="<<cst.lb(v)<<" "<<cst.rb(v)<<std::endl; 00781 // size_type d = cst.depth(v); 00782 while (v != cst.root()) { // while v is not the root 00783 ++cnt; 00784 v = cst.sl(v); // follow suffix link 00785 // if( cnt < 30 ){ 00786 // std::cout<< cnt << " " << cst.lb(v) << " " << cst.rb(v) << " " << cst.depth(v) << std::endl; 00787 // } 00788 // size_type d2 = cst.depth(v); 00789 // if( d != d2+1 ){ 00790 // std::cout<<"error at cnt "<<cnt<<" d="<<d<<" d2="<<d2<<std::endl; 00791 // } 00792 // d = d2; 00793 } 00794 } 00795 write_R_output("cst","sl","end",cnt,cnt); 00796 } 00797 00799 00803 template<class Cst> 00804 void test_cst_matching_statistics(const Cst& cst, unsigned char* S2, typename Cst::size_type n2) 00805 { 00806 typedef typename Cst::size_type size_type; 00807 typedef typename Cst::node_type node_type; 00808 00809 size_type cnt = 0; 00810 write_R_output("cst","mstats","begin",n2,cnt); 00811 size_type q = 0; // current match length 00812 size_type p2 = n2-1; // position in S2 00813 size_type i = 0, j = cst.csa.size()-1; // \f$ \epsilon \f$ matches all suffixes of S1 00814 while (p2+1 > 0) { 00815 size_type lb, rb; 00816 // perform backward search on interval \f$ [i,j] \f$ 00817 size_type size = algorithm::backward_search(cst.csa, i, j, S2[p2], lb, rb); 00818 if (size > 0) { 00819 q = q + 1; 00820 i = lb; j = rb; 00821 p2 = p2 - 1; 00822 } else if (i==0 and j == cst.csa.size()) { 00823 p2 = p2 -1; 00824 } else { 00825 // map interval to a node of the cst and calculate parent 00826 node_type p = cst.parent(cst.node(i, j)); 00827 q = cst.depth(p); // update match length 00828 i = cst.lb(p); // update left bound 00829 j = cst.rb(p); // update right bound 00830 } 00831 cnt += q; 00832 } 00833 write_R_output("cst","mstats","end",n2,cnt); 00834 } 00835 00836 // test the speed of find_close at random opening parentheses 00837 template<class Bps> 00838 void test_bps_find_close_and_enclose(const Bps& bps, const bit_vector& b, uint64_t times=10000000, uint64_t x=17) 00839 { 00840 typedef bit_vector::size_type size_type; 00841 uint64_t mask; 00842 // uint64_t n = bps.size(); 00843 int_vector<64> rands = get_rnd_positions(20, mask, bps.size()); 00844 for (size_type i=0; i<rands.size(); ++i) { 00845 if (!b[rands[i]]) { // if there is no opening parentheses at position rands[i] 00846 rands[i] = bps.find_open(rands[i]); 00847 } 00848 } 00849 uint64_t cnt = 0; 00850 write_R_output("bps","find_close","begin",times, cnt); 00851 for (size_type i=0; i<times; ++i) { 00852 cnt += bps.find_close(rands[i&mask]); 00853 } 00854 write_R_output("bps","find_close","end",times, cnt); 00855 00856 cnt = 0; 00857 write_R_output("bps","enclose rand","begin",times, cnt); 00858 for (size_type i=0; i<times; ++i) { 00859 cnt += bps.enclose(rands[i&mask]); 00860 } 00861 write_R_output("bps","enclose rand","end",times, cnt); 00862 00863 cnt = 0; 00864 size_type cnt2=0; 00865 write_R_output("bps","enclose tree","begin",times, cnt); 00866 for (size_type i=0,enc, size=bps.size(); cnt2 < times; ++i) { 00867 enc = rands[i&mask]; 00868 while ((enc = bps.enclose(enc)) != size) { 00869 cnt += enc; 00870 ++cnt2; 00871 } 00872 ++cnt2; 00873 } 00874 write_R_output("bps","enclose tree","end",times, cnt); 00875 } 00876 00877 // test the speed of find_close at random opening parentheses 00878 template<class Bps> 00879 void test_bps_find_open(const Bps& bps, const bit_vector& b, uint64_t times=10000000, uint64_t x=17) 00880 { 00881 typedef bit_vector::size_type size_type; 00882 uint64_t mask; 00883 uint64_t n = bps.size(); 00884 int_vector<64> rands = get_rnd_positions(20, mask, n); 00885 for (size_type i=0; i<rands.size(); ++i) { 00886 if (b[rands[i]]) { 00887 rands[i] = bps.find_close(rands[i]); 00888 } 00889 } 00890 uint64_t cnt = 0; 00891 write_R_output("bps","find_open","begin",times, cnt); 00892 for (size_type i=0; i<times; ++i) { 00893 cnt += bps.find_open(rands[i&mask]); 00894 } 00895 write_R_output("bps","find_open","end",times, cnt); 00896 } 00897 00898 // test the speed of the double enclose method for random opening parentheses i and i+1 00899 template<class Bps> 00900 void test_bps_double_enclose(const Bps& bps, const bit_vector& b, uint64_t times=10000000, uint64_t x=17) 00901 { 00902 typedef bit_vector::size_type size_type; 00903 uint64_t mask; 00904 uint64_t n = bps.size(); 00905 int_vector<64> rands = get_rnd_positions(20, mask, bps.size()); 00906 for (size_type i=0; i<rands.size()/2; ++i) { 00907 if (!b[rands[2*i]]) { // if there is no opening parentheses at position rands[i] 00908 uint64_t pos = (rands[2*i]+1)%n; 00909 while (!b[pos] and pos != rands[2*i]) // go forward until we get an opening one 00910 pos = (pos+1) % n; 00911 if (pos + 1000 > rands.size()) 00912 pos = 0; 00913 rands[2*i] = pos; 00914 } 00915 { 00916 uint64_t pos = (rands[2*i]+1)%n; 00917 while (!b[pos] and pos != rands[2*i]) // go forward until we get the next opening one 00918 pos = (pos+1) % n; 00919 rands[2*i+1] = pos; 00920 } 00921 } 00922 uint64_t cnt = 0; 00923 write_R_output("bps","double_enclose","begin",times, cnt); 00924 for (size_type i=0; i<times; ++i) { 00925 cnt += bps.double_enclose(rands[(i*2)&mask], rands[(i*2+1)&mask]); 00926 } 00927 write_R_output("bps","double_enclose","end",times, cnt); 00928 } 00929 00930 00931 }// end namespace sdsl 00932 00933 #endif