SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2009 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef INCLUDED_SDSL_BP_SUPPORT_SADA 00022 #define INCLUDED_SDSL_BP_SUPPORT_SADA 00023 00024 #include "int_vector.hpp" 00025 #include "rank_support.hpp" 00026 #include "select_support.hpp" 00027 #include "algorithms.hpp" 00028 #include "bitmagic.hpp" 00029 #include "fast_cache.hpp" 00030 #include <stack> 00031 #include <map> 00032 #include <set> 00033 #include <utility> 00034 #include <iostream> 00035 #include <sstream> // for get_info method 00036 #include <fstream> 00037 #include <stdexcept> 00038 #ifndef NDEBUG 00039 #include <algorithm> 00040 #endif 00041 00042 namespace sdsl 00043 { 00044 00046 00066 template<uint32_t SmlBlkSize = 256, 00067 uint32_t MedBlkDeg = 32, 00068 class RankSupport = rank_support_v<>, 00069 class SelectSupport = select_support_mcl<> > 00070 class bp_support_sada 00071 { 00072 public: 00073 typedef bit_vector::size_type size_type; 00074 typedef bit_vector::difference_type difference_type; 00075 typedef int_vector<> sml_block_array_type; 00076 typedef int_vector<> med_block_array_type; 00077 private: 00078 const bit_vector* m_bp; // the supported balanced parentheses sequence as bit_vector 00079 RankSupport m_bp_rank; // rank support for the balanced parentheses sequence => see excess() and rank() 00080 SelectSupport m_bp_select; // select support for the balanced parentheses sequence => see select() 00081 00082 sml_block_array_type m_sml_block_min_max; 00083 med_block_array_type m_med_block_min_max; 00084 00085 size_type m_size; // number of supported parentheses 00086 size_type m_sml_blocks; // number of small sized blocks 00087 size_type m_med_blocks; // number of medium sized blocks 00088 size_type m_med_inner_blocks; // number of inner nodes in the min max tree of the medium sized blocks 00089 //#define USE_CACHE 00090 #ifdef USE_CACHE 00091 mutable fast_cache find_close_cache; 00092 mutable fast_cache find_open_cache; 00093 mutable fast_cache select_cache; 00094 #endif 00095 00096 void copy(const bp_support_sada& bp_support) { 00097 m_bp = bp_support.m_bp; 00098 00099 m_bp_rank = bp_support.m_bp_rank; 00100 m_bp_rank.set_vector(m_bp); 00101 m_bp_select = bp_support.m_bp_select; 00102 m_bp_select.set_vector(m_bp); 00103 00104 m_sml_block_min_max = bp_support.m_sml_block_min_max; 00105 m_med_block_min_max = bp_support.m_med_block_min_max; 00106 00107 m_size = bp_support.m_size; 00108 m_sml_blocks = bp_support.m_sml_blocks; 00109 m_med_blocks = bp_support.m_med_blocks; 00110 m_med_inner_blocks = bp_support.m_med_inner_blocks; 00111 } 00112 00113 inline static size_type sml_block_idx(size_type i) { 00114 return i/SmlBlkSize; 00115 } 00116 00117 inline static size_type med_block_idx(size_type i) { 00118 return i/(SmlBlkSize*MedBlkDeg); 00119 } 00120 00121 inline static bool is_root(size_type v) { 00122 return v==0; 00123 } 00124 00125 inline static bool is_left_child(size_type v) { 00126 assert(!is_root(v)); 00127 return v%2; 00128 } 00129 00130 inline static bool is_right_child(size_type v) { 00131 assert(!is_root(v)); 00132 return !(v%2); 00133 } 00134 00135 inline static size_type parent(size_type v) { 00136 assert(!is_root(v)); 00137 return (v-1)/2; 00138 } 00139 00140 inline static size_type left_child(size_type v) { 00141 return 2*v+1; 00142 } 00143 00144 inline static size_type right_child(size_type v) { 00145 return 2*v+2; 00146 } 00147 00148 inline bool node_exists(size_type v)const { 00149 return v < (m_med_inner_blocks + m_med_blocks); 00150 } 00151 00152 inline static size_type right_sibling(size_type v) { 00153 return ++v; 00154 } 00155 00156 inline static size_type left_sibling(size_type v) { 00157 return --v; 00158 } 00159 00160 inline bool is_leaf(size_type v)const { 00161 return v >= m_med_inner_blocks; 00162 } 00163 00164 inline difference_type min_value(size_type v)const { 00165 return m_size-((difference_type)m_med_block_min_max[2*v]); 00166 } 00167 00168 inline difference_type max_value(size_type v)const { 00169 return m_med_block_min_max[2*v+1]-m_size; 00170 } 00171 00172 inline difference_type sml_min_value(size_type sml_block)const { 00173 return (1 - ((difference_type)m_sml_block_min_max[sml_block<<1])); 00174 } 00175 00176 inline difference_type sml_max_value(size_type sml_block)const { 00177 return (difference_type)m_sml_block_min_max[(sml_block<<1)+1] - 1; 00178 } 00179 00180 void print_node(size_type v)const { 00181 std::cout<< "v = "<< v <<" (" << min_value(v) << ", " << max_value(v) << ")" ; 00182 if (is_leaf(v)) { 00183 std::cout<<" ranging from position "<< (v-m_med_inner_blocks)*MedBlkDeg* SmlBlkSize<< " to "<< (v-m_med_inner_blocks+1)*MedBlkDeg* SmlBlkSize-1; 00184 } 00185 std::cout<< std::endl; 00186 } 00187 00189 00194 // if no such parentheses exists size() is returned. 00195 size_type fwd_excess(size_type i, difference_type rel)const { 00196 size_type j; 00197 // (1) search the small block for the answer 00198 if ((j = algorithm::near_fwd_excess(*m_bp, i+1, rel, SmlBlkSize)) > i) { 00199 return j; 00200 } 00201 difference_type desired_excess = excess(i)+rel; 00202 // (2) scan the small blocks of the current median block for an answer 00203 if ((j = fwd_excess_in_med_block(sml_block_idx(i)+1, desired_excess)) != size()) { 00204 return j; 00205 } 00206 // (3) search the min-max tree of the medium blocks for the right med block 00207 if (med_block_idx(i) == m_med_blocks) // if we are already in the last medium block => we are done 00208 return size(); 00209 size_type v = m_med_inner_blocks + med_block_idx(i); 00210 // (3 a) go up the tree 00211 while (!is_root(v)) { 00212 if (is_left_child(v)) { // if the node is a left child 00213 v = right_sibling(v); // choose right sibling 00214 if (min_value(v) <= desired_excess and desired_excess <= max_value(v)) // found solution 00215 break; 00216 } 00217 v = parent(v); // choose parent 00218 } 00219 // (3 b) go down the tree 00220 if (!is_root(v)) { // found solution for the query 00221 while (!is_leaf(v)) { 00222 v = left_child(v); // choose left child 00223 if (!(min_value(v) <= desired_excess and desired_excess <= max_value(v))) { 00224 v = right_sibling(v); // choose right child == right sibling of the left child 00225 assert((min_value(v) <= desired_excess and desired_excess <= max_value(v))); 00226 } 00227 } 00228 return fwd_excess_in_med_block((v-m_med_inner_blocks)*MedBlkDeg, desired_excess); 00229 } 00230 // no solution found 00231 return size(); 00232 } 00233 00235 00240 size_type bwd_excess(size_type i, difference_type rel)const { 00241 size_type j; 00242 if (i == 0) { 00243 return rel == 0 ? -1 : size(); 00244 } 00245 // (1) search the small block for the answer 00246 if ((j = algorithm::near_bwd_excess(*m_bp, i-1, rel, SmlBlkSize)) < i or j == (size_type)-1) { 00247 return j; 00248 } 00249 difference_type desired_excess = excess(i)+rel; 00250 //std::cerr<<"desired_excess="<<desired_excess<<" "<<excess(i)<<std::endl; 00251 // (2) scan the small blocks of the current median block for an answer 00252 if ((j = bwd_excess_in_med_block(sml_block_idx(i)-1, desired_excess)) != size()) { 00253 return j; 00254 } 00255 // (3) search the min-max tree of the medium blocks for the right med block 00256 if (med_block_idx(i) == 0) { // if we are already in the first medium block => we are done 00257 //std::cerr<<"med_block_idx("<<i<<")=0"<<std::endl; 00258 if (desired_excess == 0) 00259 return -1; 00260 return size(); 00261 } 00262 size_type v = m_med_inner_blocks + med_block_idx(i); 00263 // (3 a) go up the tree 00264 while (!is_root(v)) { 00265 if (is_right_child(v)) { // if the node is a right child 00266 v = left_sibling(v); // choose left sibling 00267 if (min_value(v) <= desired_excess and desired_excess <= max_value(v)) // found solution 00268 break; 00269 } 00270 v = parent(v); // choose parent 00271 } 00272 // (3 b) go down the tree 00273 if (!is_root(v)) { // found solution for the query 00274 while (!is_leaf(v)) { 00275 v = right_child(v); // choose child 00276 if (!(min_value(v) <= desired_excess and desired_excess <= max_value(v))) { 00277 v = left_sibling(v); // choose left child == left sibling of the right child 00278 assert((min_value(v) <= desired_excess and desired_excess <= max_value(v))); 00279 } 00280 } 00281 return bwd_excess_in_med_block((v-m_med_inner_blocks)*MedBlkDeg+(MedBlkDeg-1), desired_excess); 00282 } else if (desired_excess == 0) { 00283 return -1; 00284 } 00285 // no solution found 00286 return size(); 00287 } 00288 00290 size_type bwd_excess_in_med_block(size_type sml_block_idx, difference_type desired_excess)const { 00291 // get the first small block in the medium block right to the current med block 00292 size_type first_sml_block_in_med_block = (med_block_idx(sml_block_idx*SmlBlkSize))*MedBlkDeg; 00293 00294 while ((sml_block_idx+1) and sml_block_idx >= first_sml_block_in_med_block) { 00295 difference_type ex = (sml_block_idx == 0) ? 0 : excess(sml_block_idx*SmlBlkSize-1); 00296 difference_type min_ex = ex + (1 - ((difference_type)m_sml_block_min_max[2*sml_block_idx])); 00297 difference_type max_ex = ex + (m_sml_block_min_max[2*sml_block_idx+1] - 1); 00298 00299 if (min_ex <= desired_excess and desired_excess <= max_ex) { 00300 size_type j = algorithm::near_bwd_excess(*m_bp, (sml_block_idx+1)*SmlBlkSize-1, desired_excess-excess((sml_block_idx+1)*SmlBlkSize), SmlBlkSize); 00301 return j; 00302 } 00303 --sml_block_idx; 00304 } 00305 if (sml_block_idx == 0 and desired_excess == 0) 00306 return -1; 00307 return size(); 00308 } 00309 00311 size_type fwd_excess_in_med_block(size_type sml_block_idx, difference_type desired_excess)const { 00312 // get the first small block in the medium block right to the current med block 00313 size_type first_sml_block_nr_in_next_med_block = (med_block_idx(sml_block_idx*SmlBlkSize)+1)*MedBlkDeg; 00314 if (first_sml_block_nr_in_next_med_block > m_sml_blocks) 00315 first_sml_block_nr_in_next_med_block = m_sml_blocks; 00316 00317 assert(sml_block_idx > 0); 00318 while (sml_block_idx < first_sml_block_nr_in_next_med_block) { 00319 difference_type ex = excess(sml_block_idx*SmlBlkSize-1); 00320 difference_type min_ex = ex + (1 - ((difference_type)m_sml_block_min_max[2*sml_block_idx])); 00321 difference_type max_ex = ex + m_sml_block_min_max[2*sml_block_idx+1] - 1; 00322 if (min_ex <= desired_excess and desired_excess <= max_ex) { 00323 size_type j = algorithm::near_fwd_excess(*m_bp, sml_block_idx*SmlBlkSize, desired_excess-ex, SmlBlkSize); 00324 return j; 00325 } 00326 ++sml_block_idx; 00327 } 00328 return size(); 00329 } 00330 00331 public: 00332 const RankSupport& bp_rank; 00333 const SelectSupport& bp_select; 00334 const sml_block_array_type& sml_block_min_max; 00335 const med_block_array_type& med_block_min_max; 00336 00337 bp_support_sada():m_bp(NULL), m_size(0), m_sml_blocks(0), m_med_blocks(0), m_med_inner_blocks(0), 00338 bp_rank(m_bp_rank), bp_select(m_bp_select), sml_block_min_max(m_sml_block_min_max), med_block_min_max(m_med_block_min_max) 00339 {} 00340 00342 explicit bp_support_sada(const bit_vector* bp):m_bp(bp), 00343 m_size(bp==NULL?0:bp->size()), 00344 m_sml_blocks((m_size+SmlBlkSize-1)/SmlBlkSize), 00345 m_med_blocks((m_size+SmlBlkSize*MedBlkDeg-1)/(SmlBlkSize* MedBlkDeg)), 00346 bp_rank(m_bp_rank), 00347 bp_select(m_bp_select), 00348 sml_block_min_max(m_sml_block_min_max), 00349 med_block_min_max(m_med_block_min_max) { 00350 if (SmlBlkSize==0) { 00351 throw std::logic_error(util::demangle(typeid(this).name())+": SmlBlkSize should be greater than 0!"); 00352 } 00353 if (bp == NULL or bp->size()==0) 00354 return; 00355 // initialize rank and select 00356 util::init_support(m_bp_rank, bp); 00357 util::init_support(m_bp_select, bp); 00358 00359 m_med_inner_blocks = 1; 00360 // m_med_inner_blocks = (next power of 2 greater than or equal to m_med_blocks)-1 00361 while (m_med_inner_blocks < m_med_blocks) { 00362 m_med_inner_blocks <<= 1; assert(m_med_inner_blocks!=0); 00363 } 00364 --m_med_inner_blocks; 00365 assert((m_med_inner_blocks == 0) or (m_med_inner_blocks%2==1)); 00366 00367 m_sml_block_min_max = int_vector<>(2*m_sml_blocks, 0, bit_magic::l1BP(SmlBlkSize+2)+1); 00368 m_med_block_min_max = int_vector<>(2*(m_med_blocks+m_med_inner_blocks), 0, bit_magic::l1BP(2*m_size+2)+1); 00369 00370 // calculate min/max excess values of the small blocks and medium blocks 00371 difference_type min_ex = 1, max_ex = -1, curr_rel_ex = 0, curr_abs_ex = 0; 00372 for (size_type i=0; i < m_size; ++i) { 00373 if ((*bp)[i]) 00374 ++curr_rel_ex; 00375 else 00376 --curr_rel_ex; 00377 if (curr_rel_ex > max_ex) max_ex = curr_rel_ex; 00378 if (curr_rel_ex < min_ex) min_ex = curr_rel_ex; 00379 if ((i+1)%SmlBlkSize == 0 or i+1 == m_size) { 00380 size_type sidx = i/SmlBlkSize; 00381 m_sml_block_min_max[2*sidx ] = -(min_ex-1); 00382 m_sml_block_min_max[2*sidx + 1] = max_ex+1; 00383 00384 size_type v = m_med_inner_blocks + sidx/MedBlkDeg; 00385 00386 if ((difference_type)(-(curr_abs_ex + min_ex)+m_size) > ((difference_type)m_med_block_min_max[2*v])) { 00387 assert(curr_abs_ex+min_ex <= min_value(v)); 00388 m_med_block_min_max[2*v] = -(curr_abs_ex + min_ex)+m_size; 00389 } 00390 00391 if ((difference_type)(curr_abs_ex + max_ex + m_size) > (difference_type)m_med_block_min_max[2*v + 1]) 00392 m_med_block_min_max[2*v + 1] = curr_abs_ex + max_ex + m_size; 00393 00394 curr_abs_ex += curr_rel_ex; 00395 min_ex = 1; max_ex = -1; curr_rel_ex = 0; 00396 } 00397 } 00398 00399 for (size_type v = m_med_block_min_max.size()/2 - 1; !is_root(v); --v) { 00400 size_type p = parent(v); 00401 if (min_value(v) < min_value(p)) // update minimum 00402 m_med_block_min_max[2*p] = m_med_block_min_max[2*v]; 00403 if (max_value(v) > max_value(p)) // update maximum 00404 m_med_block_min_max[2*p+1] = m_med_block_min_max[2*v+1]; 00405 } 00406 #ifdef DEBUG_BP_SUPPORT_SADA 00407 for (size_type i=0; i<m_sml_block_min_max.size(); ++i) { 00408 if (i%2==0) 00409 std::cout << (-(difference_type)m_sml_block_min_max[i]) + 1 << std::endl; 00410 else { 00411 std::cout << ((difference_type)m_sml_block_min_max[i]) - 1 << std::endl; 00412 std::cout<<std::endl; 00413 } 00414 } 00415 std::cout<<"m_sml_blocks = "<<m_sml_blocks<<std::endl; std::cout<<"m_med_blocks = "<<m_med_blocks<<std::endl; 00416 std::cout<<"m_med_inner_blocks = "<<m_med_inner_blocks<<std::endl; 00417 for (size_type i=0; i<m_med_block_min_max.size(); ++i) { 00418 if (i%2==0) 00419 std::cout<< (-(difference_type)m_med_block_min_max[i]) + 1 <<" "; 00420 else 00421 std::cout<<((difference_type)m_med_block_min_max[i])-1<<" "; 00422 } 00423 std::cout<<std::endl; 00424 #endif 00425 } 00426 00428 bp_support_sada(const bp_support_sada& bp_support): 00429 bp_rank(m_bp_rank), bp_select(m_bp_select), sml_block_min_max(m_sml_block_min_max), med_block_min_max(m_med_block_min_max) { 00430 copy(bp_support); 00431 } 00432 00434 ~bp_support_sada() { 00435 } 00436 00438 00442 void swap(bp_support_sada& bp_support) { 00443 // m_bp.swap(bp_support.m_bp); use set_vector to set the supported bit_vector 00444 m_bp_rank.swap(bp_support.m_bp_rank); 00445 m_bp_select.swap(bp_support.m_bp_select); 00446 00447 m_sml_block_min_max.swap(bp_support.m_sml_block_min_max); 00448 m_med_block_min_max.swap(bp_support.m_med_block_min_max); 00449 00450 std::swap(m_size, bp_support.m_size); 00451 std::swap(m_sml_blocks, bp_support.m_sml_blocks); 00452 std::swap(m_med_blocks, bp_support.m_med_blocks); 00453 std::swap(m_med_inner_blocks, bp_support.m_med_inner_blocks); 00454 } 00455 00457 bp_support_sada& operator=(const bp_support_sada& bp_support) { 00458 if (this != &bp_support) { 00459 copy(bp_support); 00460 } 00461 return *this; 00462 } 00463 00464 void set_vector(const bit_vector* bp) { 00465 m_bp = bp; 00466 m_bp_rank.set_vector(bp); 00467 m_bp_select.set_vector(bp); 00468 } 00469 00473 inline difference_type excess(size_type i)const { 00474 return (m_bp_rank(i+1)<<1)-i-1; 00475 } 00476 00480 size_type rank(size_type i)const { 00481 return m_bp_rank(i+1); 00482 } 00483 00489 size_type select(size_type i)const { 00490 #ifdef USE_CACHE 00491 size_type a = 0; 00492 if (select_cache.exists(i, a)) { 00493 return a; 00494 } else { 00495 a = m_bp_select(i); 00496 select_cache.write(i, a); 00497 return a; 00498 } 00499 #endif 00500 return m_bp_select(i); 00501 } 00502 00509 size_type find_close(size_type i)const { 00510 #ifdef SDSL_DEBUG_BP 00511 if (i >= m_size) { 00512 throw std::out_of_range("OUT_OF_RANGE: bp_support_sada::find_close"); 00513 } 00514 #endif 00515 if (!(*m_bp)[i]) {// if there is a closing parenthesis at index i return i 00516 return i; 00517 } 00518 #ifdef USE_CACHE 00519 size_type a = 0; 00520 if (find_close_cache.exists(i, a)) { 00521 return a; 00522 } else { 00523 a = fwd_excess(i, -1); 00524 find_close_cache.write(i, a); 00525 return a; 00526 } 00527 #endif 00528 return fwd_excess(i, -1); 00529 } 00530 00532 00537 size_type find_open(size_type i)const { 00538 #ifdef SDSL_DEBUG_BP 00539 if (i >= m_size) { 00540 throw std::out_of_range("OUT_OF_RANGE: bp_support_sada::find_open"); 00541 } 00542 #endif 00543 if ((*m_bp)[i]) {// if there is a opening parenthesis at index i return i 00544 return i; 00545 } 00546 #ifdef USE_CACHE 00547 size_type a = 0; 00548 if (find_open_cache.exists(i, a)) { 00549 return a; 00550 } else { 00551 size_type bwd_ex = bwd_excess(i,0); 00552 if (bwd_ex == size()) 00553 a = size(); 00554 else 00555 a = bwd_ex+1; 00556 find_open_cache.write(i, a); 00557 return a; 00558 } 00559 #endif 00560 size_type bwd_ex = bwd_excess(i,0); 00561 if (bwd_ex == size()) 00562 return size(); 00563 else 00564 return bwd_ex+1; 00565 } 00566 00568 00572 size_type enclose(size_type i)const { 00573 #ifdef SDSL_DEBUG_BP 00574 if (i >= m_size) { 00575 throw std::out_of_range("OUT_OF_RANGE: bp_support_sada::enclose."); 00576 } 00577 #endif 00578 if (!(*m_bp)[i]) { // if there is closing parenthesis at position i 00579 return find_open(i); 00580 } 00581 size_type bwd_ex = bwd_excess(i, -2); 00582 if (bwd_ex == size()) 00583 return size(); 00584 else 00585 return bwd_ex+1; 00586 } 00587 00589 00596 size_type rr_enclose(const size_type i, const size_type j)const { 00597 assert(j < m_size); 00598 assert((*m_bp)[i]==1 and (*m_bp)[j]==1); 00599 const size_type mip1 = find_close(i)+1; 00600 if (mip1 >= j) 00601 return size(); 00602 return rmq_open(mip1, j); 00603 } 00604 00613 size_type rmq_open(const size_type l, const size_type r)const { 00614 assert(r < m_bp->size()); 00615 if (l >= r) 00616 return size(); 00617 size_type res = rmq(l, r-1); 00618 assert(res>=l and res<=r-1); 00619 if ((*m_bp)[res] == 1) { // The parenthesis with minimal excess is opening 00620 assert(find_close(res) >= r); 00621 return res; 00622 } else { 00623 res = res+1; // go to the next parenthesis to the right 00624 if (res < r) { // The parenthesis with minimal excess if closing and the next opening parenthesis is less than r 00625 assert((*m_bp)[res] == 1); 00626 size_type ec = enclose(res); 00627 if (ec < l or ec == size()) { 00628 assert(find_close(res)>=r); 00629 return res; 00630 } else { 00631 assert(find_close(ec)>=r); 00632 return ec; 00633 } 00634 } else if (res == r) { 00635 size_type ec = enclose(res); // if m_bp[res]==0 => find_open(res), if m_bp[res]==1 => enclose(res) 00636 if (ec >= l) { 00637 assert(excess(ec)==excess(res-1)); 00638 return ec; 00639 } 00640 } 00641 } 00642 return size(); 00643 } 00644 00645 size_type median_block_rmq(size_type l_sblock, size_type r_sblock, bit_vector::difference_type& min_ex)const { 00646 assert(l_sblock <= r_sblock+1); 00647 size_type pos_min_block = (size_type)-1; 00648 difference_type e = 0; 00649 if (l_sblock == 0) { 00650 if (sml_min_value(0) <= min_ex) { 00651 pos_min_block = 0; 00652 min_ex = sml_min_value(0); 00653 } 00654 l_sblock = 1; 00655 } 00656 for (size_type i=l_sblock; i <= r_sblock; ++i) { 00657 if ((e = (excess(i*SmlBlkSize-1) + sml_min_value(i))) <= min_ex) { 00658 pos_min_block = i; 00659 min_ex = e; 00660 } 00661 } 00662 return pos_min_block; 00663 } 00664 00665 00667 00670 size_type rmq(size_type l, size_type r)const { 00671 assert(l<=r); 00672 size_type sbl = sml_block_idx(l); 00673 size_type sbr = sml_block_idx(r); 00674 difference_type min_rel_ex = 0; 00675 if (sbl == sbr) { // if l and r are in the same small block 00676 return algorithm::near_rmq(*m_bp, l, r, min_rel_ex); 00677 } else { 00678 difference_type min_ex = 0; // current minimal excess value 00679 size_type min_pos = 0; // current min pos 00680 enum min_pos_type {POS, SMALL_BLOCK_POS, MEDIUM_BLOCK_POS}; 00681 enum min_pos_type pos_type = POS; // current 00682 min_pos = algorithm::near_rmq(*m_bp, l, (sbl+1)*SmlBlkSize-1, min_rel_ex); // scan the leftmost small block of l 00683 assert(min_pos >= l); 00684 min_ex = excess(l) + min_rel_ex; 00685 00686 size_type mbl = med_block_idx(l); 00687 size_type mbr = med_block_idx(r); assert(mbl <= mbr); 00688 00689 size_type temp = median_block_rmq(sbl+1, std::min((mbl+1)*MedBlkDeg-1, sbr-1), min_ex); // scan the medium block of l 00690 if (temp != (size_type)-1) { 00691 assert(temp*SmlBlkSize >= l and temp*SmlBlkSize <= r); 00692 min_pos = temp; 00693 assert(min_pos >= 0 and min_pos < m_sml_blocks); 00694 pos_type = SMALL_BLOCK_POS; 00695 } 00696 #if 0 00697 // sequential scan the medium blocks 00698 for (size_type v=mbl+1+m_med_inner_blocks; v < mbr + m_med_inner_blocks; ++v) { 00699 assert(is_leaf(v)); 00700 if (min_value(v) <= min_ex) { 00701 min_ex = min_value(v); 00702 min_pos = v; 00703 assert(min_pos-m_med_inner_blocks >= 0 and min_pos < m_med_blocks-m_med_inner_blocks); 00704 pos_type = MEDIUM_BLOCK_POS; 00705 } 00706 } 00707 #else 00708 // tree search in the min max tree of the medium blocks 00709 if (mbr-mbl > 1) { 00710 size_type v = mbl + 1 + m_med_inner_blocks; 00711 size_type rcb = mbl + 1; // rightmost covered block 00712 size_type h = 0; // subtree height 00713 while (rcb < mbr-1) { // go up the tree until the rightmost covered block >= mbr-1 00714 if (min_value(v) <= min_ex) { 00715 min_ex = min_value(v); min_pos = v; pos_type = MEDIUM_BLOCK_POS; 00716 } 00717 if (is_right_child(v)) { // v is a right child 00718 h += 1; 00719 rcb += (1<<h); 00720 v = right_sibling(parent(v)); 00721 } else { // it is a left child 00722 rcb += (1<<h); 00723 h += 1; 00724 v = parent(v); 00725 } 00726 } 00727 if (rcb <= mbr-1 and min_value(v) <= min_ex) { 00728 min_ex = min_value(v); min_pos = v; pos_type = MEDIUM_BLOCK_POS; 00729 } 00730 assert(node_exists(v)); 00731 assert(rcb >= mbr-1); 00732 00733 while (rcb != mbr-1) { // go down the tree until the rightmost covered block = mbr-1 00734 assert(h != (size_type)-1); 00735 if (rcb > mbr-1) { 00736 h = h-1; 00737 rcb = rcb - (1<<h); 00738 v = left_child(v); 00739 } else { // rcb < mbr-1 00740 h = h-1; 00741 rcb = rcb + (1<<h); 00742 v = right_sibling(right_child(v)); 00743 } 00744 if (rcb <= mbr-1 and min_value(v) <= min_ex) { 00745 min_ex = min_value(v); min_pos = v; pos_type = MEDIUM_BLOCK_POS; 00746 } 00747 } 00748 if (pos_type == MEDIUM_BLOCK_POS) { 00749 while (!is_leaf(min_pos)) { 00750 min_pos = right_child(min_pos); 00751 if (!node_exists(min_pos) or min_value(min_pos) > min_ex) 00752 min_pos = left_sibling(min_pos); 00753 } 00754 } 00755 } 00756 #endif 00757 00758 // search in the medium block of r 00759 temp = median_block_rmq(std::max(mbr*MedBlkDeg, sbl+1), sbr-1, min_ex); // scan the medium block of r 00760 if (temp != (size_type)-1) { 00761 assert(temp*SmlBlkSize >= l and temp*SmlBlkSize <= r); 00762 min_pos = temp; 00763 pos_type = SMALL_BLOCK_POS; 00764 } 00765 // search in the small block of r 00766 temp = algorithm::near_rmq(*m_bp, sbr*SmlBlkSize, r, min_rel_ex); // scan the small block of r 00767 if ((excess(sbr*SmlBlkSize) + min_rel_ex) <= min_ex) { // if it contains the minimum return its position 00768 assert(temp>=l and temp<=r); 00769 return temp; 00770 } 00771 // if the found minimum lies in a medium block => find its small block 00772 if (pos_type == MEDIUM_BLOCK_POS) { 00773 min_pos = min_pos - m_med_inner_blocks; 00774 // std::cerr<<"min_ex="<<min_ex<<std::endl; 00775 temp = median_block_rmq(min_pos*MedBlkDeg, (min_pos+1)*MedBlkDeg-1, min_ex); 00776 assert(temp != (size_type)-1); // assert that we find a solution 00777 assert(temp*SmlBlkSize >= l and temp*SmlBlkSize <= r); 00778 min_pos = temp; 00779 pos_type = SMALL_BLOCK_POS; 00780 } 00781 if (pos_type == SMALL_BLOCK_POS) { 00782 min_pos = algorithm::near_rmq(*m_bp, min_pos*SmlBlkSize, (min_pos+1)*SmlBlkSize-1, min_rel_ex); 00783 assert(min_pos >=l and min_pos <= r); 00784 } 00785 return min_pos; 00786 } 00787 } 00788 00790 00797 size_type rr_enclose_naive(size_type i, size_type j)const { 00798 assert(j > i and j < m_size); 00799 assert((*m_bp)[i]==1 and (*m_bp)[j]==1); 00800 size_type mi = find_close(i); // matching parenthesis to i 00801 assert(mi > i and mi < j); 00802 assert(find_close(j) > j); 00803 size_type k = enclose(j); 00804 if (k == m_size or k < i) // there exists no opening parenthesis at position mi<k<j. 00805 return m_size; 00806 size_type kk; 00807 do { 00808 kk = k; 00809 k = enclose(k); 00810 } while (k != m_size and k > mi); 00811 return kk; 00812 } 00813 00815 00820 size_type double_enclose(size_type i, size_type j)const { 00821 assert(j > i); 00822 assert((*m_bp)[i]==1 and (*m_bp)[j]==1); 00823 size_type k = rr_enclose(i, j); 00824 if (k == size()) 00825 return enclose(j); 00826 else 00827 return enclose(k); 00828 } 00829 00831 00833 size_type preceding_closing_parentheses(size_type i)const { 00834 assert(i < m_size); 00835 if (!i) return 0; 00836 size_type ones = m_bp_rank(i); 00837 if (ones) { // ones > 0 00838 assert(m_bp_select(ones) < i); 00839 return i - m_bp_select(ones) - 1; 00840 } else { 00841 return i; 00842 } 00843 } 00844 00848 size_type size() const { 00849 return m_size; 00850 } 00851 00853 00857 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { 00858 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00859 size_type written_bytes = 0; 00860 written_bytes += util::write_member(m_size, out, child, "size"); 00861 written_bytes += util::write_member(m_sml_blocks, out, child, "sml_block_cnt"); 00862 written_bytes += util::write_member(m_med_blocks, out, child, "med_block_cnt"); 00863 written_bytes += util::write_member(m_med_inner_blocks, out, child, "med_inner_blocks"); 00864 00865 written_bytes += m_bp_rank.serialize(out, child, "bp_rank"); 00866 written_bytes += m_bp_select.serialize(out, child, "bp_select"); 00867 00868 written_bytes += m_sml_block_min_max.serialize(out, child, "sml_blocks"); 00869 written_bytes += m_med_block_min_max.serialize(out, child, "med_blocks"); 00870 00871 structure_tree::add_size(child, written_bytes); 00872 return written_bytes; 00873 } 00874 00876 00880 void load(std::istream& in, const bit_vector* bp) { 00881 m_bp = bp; 00882 util::read_member(m_size, in); 00883 assert(m_size == bp->size()); 00884 util::read_member(m_sml_blocks, in); 00885 util::read_member(m_med_blocks, in); 00886 util::read_member(m_med_inner_blocks, in); 00887 00888 m_bp_rank.load(in, m_bp); 00889 m_bp_select.load(in, m_bp); 00890 00891 m_sml_block_min_max.load(in); 00892 m_med_block_min_max.load(in); 00893 } 00894 00895 std::string get_info()const { 00896 std::stringstream ss; 00897 if (m_bp != NULL) { 00898 ss<<"number of parentheses: "<<m_bp->size()<<std::endl; 00899 } else { 00900 ss<<"bp_support_sada is not yet initialized!"<<std::endl; 00901 } 00902 00903 return ss.str(); 00904 } 00905 }; 00906 00907 }// end namespace 00908 00909 00910 00911 00912 #endif