SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2009 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef INCLUDED_SDSL_BP_SUPPORT_G 00022 #define INCLUDED_SDSL_BP_SUPPORT_G 00023 00024 #include "int_vector.hpp" 00025 #include "nearest_neighbour_dictionary.hpp" 00026 #include "rmq_support.hpp" 00027 #include "rank_support.hpp" 00028 #include "select_support.hpp" 00029 #include "algorithms.hpp" 00030 #include "util.hpp" 00031 #include <stack> 00032 #include <map> 00033 #include <set> 00034 #include <utility> 00035 #include <iostream> 00036 #include <sstream> // for get_info method 00037 #include <fstream> 00038 #include <stdexcept> 00039 00040 namespace sdsl 00041 { 00042 00044 00062 template<class NearestNeighbourDictionary = nearest_neighbour_dictionary<30>, 00063 class RankSupport = rank_support_v<>, 00064 class SelectSupport = select_support_mcl<>, 00065 class RangeMaxSupport = range_maximum_support_sparse_table<int_vector<> >::type > 00066 class bp_support_g 00067 { 00068 public: 00069 typedef bit_vector::size_type size_type; 00070 private: 00071 // typedef range_maximum_support_sparse_table<>::type RangeMaxType; 00072 const bit_vector* m_bp; // the supported balanced parentheses sequence as bit_vector 00073 RankSupport m_rank_bp; // rank support for the balanced parentheses sequence => see excess() and rank() 00074 SelectSupport m_select_bp; // select support for the balanced parentheses sequence => see select() 00075 00076 NearestNeighbourDictionary m_nnd; // nearest neighbour dictionary for pioneers bit_vector 00077 00078 bit_vector m_pioneer_bp; // first level of recursion: balanced parentheses sequence of the pioneers 00079 RankSupport m_rank_pioneer_bp;// rank for the balanced parentheses sequence of the pioneers 00080 NearestNeighbourDictionary m_nnd2; // nearest neighbour dictionary for pioneers of pioneers bit_vector 00081 int_vector<> m_match; // 00082 int_vector<> m_enclose; // 00083 RangeMaxSupport m_range_max_match;// range maximum support for m_match 00084 00085 00086 uint32_t m_block_size; 00087 size_type m_size; 00088 size_type m_blocks; // number of blocks 00089 00090 void copy(const bp_support_g& bp_support) { 00091 m_bp = bp_support.m_bp; 00092 m_rank_bp = bp_support.m_rank_bp; 00093 m_rank_bp.set_vector(m_bp); 00094 m_select_bp = bp_support.m_select_bp; 00095 m_select_bp.set_vector(m_bp); 00096 00097 m_nnd = bp_support.m_nnd; 00098 00099 m_pioneer_bp = bp_support.m_pioneer_bp; 00100 m_rank_pioneer_bp = bp_support.m_rank_pioneer_bp; 00101 m_rank_pioneer_bp.set_vector(&m_pioneer_bp); 00102 m_nnd2 = bp_support.m_nnd2; 00103 m_match = bp_support.m_match; 00104 m_enclose = bp_support.m_enclose; 00105 m_range_max_match = bp_support.m_range_max_match; 00106 m_range_max_match.set_vector(&m_match); 00107 00108 m_block_size = bp_support.m_block_size; 00109 m_size = bp_support.m_size; 00110 m_blocks = bp_support.m_blocks; 00111 } 00112 00116 inline size_type excess_pioneer(size_type i)const { 00117 return (m_rank_pioneer_bp(i+1)<<1)-i-1; 00118 } 00119 00120 public: 00121 const RankSupport& bp_rank; 00122 const SelectSupport& bp_select; 00123 00124 00126 explicit bp_support_g(const bit_vector* bp = NULL, uint32_t used_block_size = 840):m_bp(bp), m_block_size(used_block_size), m_size(bp==NULL?0:bp->size()), m_blocks((m_size+used_block_size-1)/used_block_size),bp_rank(m_rank_bp), bp_select(m_select_bp) { 00127 if (m_block_size<=2) { 00128 throw std::logic_error(util::demangle(typeid(this).name())+": block_size should be greater than 2!"); 00129 } 00130 if (bp == NULL) 00131 return; 00132 util::init_support(m_rank_bp, bp); 00133 util::init_support(m_select_bp, bp); 00134 bit_vector pioneer; 00135 // calulate pioneers 00136 algorithm::calculate_pioneers_bitmap(*m_bp, m_block_size, pioneer); 00137 m_nnd = NearestNeighbourDictionary(pioneer); 00138 m_pioneer_bp.resize(m_nnd.ones()); 00139 for (size_type i=1; i<= m_nnd.ones(); ++i) // replace this by an iterator!!! see todo for the nnd data structure 00140 m_pioneer_bp[i-1] = (*m_bp)[m_nnd.select(i)]; 00141 util::init_support(m_rank_pioneer_bp, &m_pioneer_bp); 00142 algorithm::calculate_pioneers_bitmap(m_pioneer_bp, m_block_size, pioneer); 00143 m_nnd2 = NearestNeighbourDictionary(pioneer); 00144 00145 bit_vector pioneer_bp2 = bit_vector(m_nnd2.ones()); 00146 for (size_type i=1; i<= m_nnd2.ones(); ++i) // replace this by an iterator!!! see todo for the nnd data structure 00147 pioneer_bp2[i-1] = m_pioneer_bp[m_nnd2.select(i)]; 00148 algorithm::calculate_matches(pioneer_bp2, m_match); 00149 algorithm::calculate_enclose(pioneer_bp2, m_enclose); 00150 m_range_max_match = RangeMaxSupport(&m_match); 00151 } 00152 00154 bp_support_g(const bp_support_g& bp_support): bp_rank(m_rank_bp), bp_select(m_select_bp) { 00155 copy(bp_support); 00156 } 00157 00159 bp_support_g& operator=(const bp_support_g& bp_support) { 00160 if (this != &bp_support) { 00161 copy(bp_support); 00162 } 00163 return *this; 00164 } 00165 00166 void swap(bp_support_g& bp_support) { 00167 m_rank_bp.swap(bp_support.m_rank_bp); 00168 m_select_bp.swap(bp_support.m_select_bp); 00169 00170 m_nnd.swap(bp_support.m_nnd); 00171 00172 m_pioneer_bp.swap(bp_support.m_pioneer_bp); 00173 util::swap_support(m_rank_pioneer_bp, bp_support.m_rank_pioneer_bp, 00174 &m_pioneer_bp, &(bp_support.m_pioneer_bp)); 00175 00176 m_nnd2.swap(bp_support.m_nnd2); 00177 00178 m_match.swap(bp_support.m_match); 00179 m_enclose.swap(bp_support.m_enclose); 00180 util::swap_support(m_range_max_match, bp_support.m_range_max_match, 00181 &m_match, &(bp_support.m_match)); 00182 00183 std::swap(m_block_size, bp_support.m_block_size); 00184 std::swap(m_size, bp_support.m_size); 00185 std::swap(m_blocks, bp_support.m_blocks); 00186 } 00187 00188 void set_vector(const bit_vector* bp) { 00189 m_bp = bp; 00190 m_rank_bp.set_vector(bp); 00191 m_select_bp.set_vector(bp); 00192 } 00193 00197 inline size_type excess(size_type i)const { 00198 return (m_rank_bp(i+1)<<1)-i-1; 00199 } 00200 00204 size_type rank(size_type i)const { 00205 return m_rank_bp(i+1); 00206 } 00207 00213 size_type select(size_type i)const { 00214 return m_select_bp(i); 00215 } 00216 00223 size_type find_close(size_type i)const { 00224 #ifdef SDSL_DEBUG_BP 00225 if (i >= m_size) { 00226 throw std::out_of_range("OUT_OF_RANGE: bp_support_g::find_close"); 00227 } 00228 #endif 00229 if (!(*m_bp)[i]) {// if there is a closing parenthesis at index i return i 00230 return i; 00231 } 00232 size_type mi = 0; // match for i 00233 if ((mi=algorithm::near_find_close(*m_bp, i, m_block_size))==i) { 00234 const size_type i2 = m_nnd.rank(i+1)-1; // lemma that this gives us an opening pioneer 00235 assert(m_pioneer_bp[i2]==1); // assert that i2 is an opening parenthesis 00236 size_type mi2 = 0; // match for i2 00237 if ((mi2=algorithm::near_find_close(m_pioneer_bp, i2, m_block_size)) == i2) { 00238 const size_type i3 = m_nnd2.rank(i2+1)-1; 00239 const size_type mi3 = m_match[i3]; assert(mi3>i3); // assert that i3 is an opening parenthesis 00240 mi2 = m_nnd2.select(mi3+1); // matching pioneer position in pioneer_bp 00241 mi2 = (mi2/m_block_size)*m_block_size; 00242 size_type epb = excess_pioneer(mi2);// excess of first parenthesis in the pioneer block 00243 const size_type ei = excess_pioneer(i2);// excess of pioneer 00244 /* invariant: epb >= ei-1 */ assert(epb+1 >= ei); 00245 while (epb+1 != ei) { 00246 assert(mi2 < m_pioneer_bp.size()); 00247 if (m_pioneer_bp[++mi2]) 00248 ++epb; 00249 else 00250 --epb; 00251 } 00252 } 00253 mi = m_nnd.select(mi2+1); /* matching pioneer position in bp */ assert((*m_bp)[mi]==0); 00254 mi = (mi/m_block_size)*m_block_size; 00255 size_type epb = excess(mi); // excess of first parenthesis in the pioneer block 00256 const size_type ei = excess(i); // excess at position i 00257 /* invariant: epb >= ei-1 */ assert(epb+1 >= ei); 00258 while (epb+1 != ei) { 00259 assert(mi < m_size); 00260 if ((*m_bp)[++mi]) 00261 ++epb; 00262 else 00263 --epb; 00264 } 00265 } 00266 return mi; 00267 } 00268 00270 00275 size_type find_open(size_type i)const { 00276 #ifdef SDSL_DEBUG_BP 00277 if (i >= m_size) { 00278 throw std::out_of_range("OUT_OF_RANGE: bp_support_g::find_open"); 00279 } 00280 #endif 00281 if ((*m_bp)[i]) {// if there is a opening parenthesis at index i return i 00282 return i; 00283 } 00284 size_type mi = 0; // match for i 00285 if ((mi=algorithm::near_find_open(*m_bp, i, m_block_size)) == i) { 00286 const size_type i2 = m_nnd.rank(i); // lemma that this gives us an closing pioneer 00287 assert(m_pioneer_bp[i2]==0); // assert that i2 is an opening parenthesis 00288 const size_type mi2 = find_open_in_pioneers(i2); assert(m_pioneer_bp[mi2]==1); 00289 mi = m_nnd.select(mi2+1); /* matching pioneer position in bp */ assert((*m_bp)[mi]==1); 00290 mi = (mi/m_block_size)*m_block_size + m_block_size - 1; assert(mi < i); 00291 size_type epb = excess(mi); // excess of last parenthesis in the pioneer block 00292 const size_type ei = excess(i); // excess at position i 00293 /*invariant: epb >= ei+1*/ assert(epb >= ei+1); 00294 while (epb != ei) { 00295 assert(mi < m_size); 00296 if ((*m_bp)[mi--]) 00297 --epb; 00298 else 00299 ++epb; 00300 } 00301 ++mi; 00302 } 00303 return mi; 00304 } 00305 00306 inline size_type find_open_in_pioneers(size_type i)const { 00307 size_type mi = 0; // match for i 00308 if ((mi=algorithm::near_find_open(m_pioneer_bp, i, m_block_size))==i) { 00309 const size_type i3 = m_nnd2.rank(i); 00310 const size_type mi3 = m_match[i3]; assert(mi3<i3); // assert that i3 is an closing parenthesis 00311 mi = m_nnd2.select(mi3+1); // matching pioneer position in pioneer_bp 00312 mi = (mi/m_block_size)*m_block_size + m_block_size - 1; 00313 size_type epb2 = excess_pioneer(mi);// excess of last parenthesis in the pioneer block 00314 const size_type ei = excess_pioneer(i);// excess of pioneer 00315 /* invariant: epb2 >= ei+1 */ assert(epb2 >= ei+1); 00316 while (epb2 != ei) { 00317 assert(mi < m_pioneer_bp.size()); 00318 if (m_pioneer_bp[mi--]) 00319 --epb2; 00320 else 00321 ++epb2; 00322 } 00323 ++mi; 00324 } 00325 return mi; 00326 } 00327 00329 00333 size_type enclose(size_type i)const { 00334 #ifdef SDSL_DEBUG_BP 00335 if (i >= m_size) { 00336 throw std::out_of_range("OUT_OF_RANGE: bp_support_g::enclose."); 00337 } 00338 #endif 00339 if (!(*m_bp)[i]) { // if there is closing parenthesis at position i 00340 return find_open(i); 00341 } 00342 const size_type exi = excess(i); 00343 if (exi == 1) // if i is not enclosed by a parentheses pair.. 00344 return size(); 00345 size_type ei; // enclose for i 00346 if ((ei=algorithm::near_enclose(*m_bp, i, m_block_size))==i) { 00347 const size_type i2 = m_nnd.rank(i); // next parenthesis in the pioneer bitmap 00348 size_type ei2; // enclose for i2 00349 if (m_pioneer_bp[i2]) { // search enclose in the pioneer bp 00350 if ((ei2=algorithm::near_enclose(m_pioneer_bp, i2, m_block_size))==i2) { 00351 const size_type i3 = m_nnd2.rank(i2); // next parenthesis in the pioneer2 bitmap 00352 const size_type ei3 = m_enclose[i3]; assert(ei3<i3); // assert that enclose answer is valid 00353 ei2 = m_nnd2.select(ei3+1); assert(m_pioneer_bp[ei2] == 1); 00354 ei2 = (ei2/m_block_size)*m_block_size + m_block_size - 1; assert(ei2 < i2); 00355 size_type epb2 = excess_pioneer(ei2);// excess of the last parenthesis in the pioneer block 00356 const size_type exi2 = excess_pioneer(i2);// excess 00357 /* invariant epb2+1 >= exi2 */ assert(epb2+1 >= exi2); 00358 while (epb2+2 != exi2) { 00359 if (m_pioneer_bp[ei2--]) 00360 --epb2; 00361 else 00362 ++epb2; 00363 } 00364 ++ei2; 00365 } 00366 // else if( ei2 > i2 ){ 00367 // ei2 = find_open_in_pioneers(ei2); 00368 // } 00369 } else { 00370 // if the next parenthesis in the pioneer bitmap is an closing parenthesis findopen on m_pioneer_bp 00371 ei2 = find_open_in_pioneers(i2); 00372 } 00373 assert(m_pioneer_bp[ei2]==1); 00374 ei = m_nnd.select(ei2+1); assert((*m_bp)[ei]==1); 00375 ei = (ei/m_block_size)*m_block_size + m_block_size - 1; assert(ei < i); 00376 size_type epb = excess(ei); // excess of the last parenthesis in the pioneer block 00377 /* invariant epb+1 >= exi */ assert(epb+1 >= exi); 00378 while (epb+2 != exi) { 00379 if ((*m_bp)[ei--]) 00380 --epb; 00381 else 00382 ++epb; 00383 } 00384 ++ei; 00385 } 00386 // else if( ei > i ){ 00387 // return find_open(ei); 00388 // } 00389 return ei; 00390 } 00391 00393 00400 size_type rr_enclose(const size_type i, const size_type j)const { 00401 assert(j > i and j < m_size); 00402 const size_type mip1 = find_close(i)+1; 00403 if (mip1 >= j) 00404 return size(); 00405 return rmq_open(mip1, j); 00406 } 00407 00421 size_type rmq_open(const size_type l, const size_type r)const { 00422 if (l >= r) 00423 return size(); 00424 size_type min_ex_pos = r; 00425 00426 if (l/m_block_size == r/m_block_size) { 00427 min_ex_pos = algorithm::near_rmq_open(*m_bp, l, r); 00428 } else { // parentheses pair does not start in the same block 00429 // assert( l>1 ); // mi is at greater or equal than 1 00430 // note: mi and r are not in the same block 00431 size_type k, ex; // helper variables 00432 size_type min_ex = excess(r);// + 2*((*m_bp[r])==0); // minimal excess 00433 const size_type bl = (l/m_block_size+1)*m_block_size; // leftmost position of the leftmost block between the blocks of l and r 00434 const size_type br = (r/m_block_size)*m_block_size; // leftmost position of the block of r 00435 00436 00437 // 1.2 00438 size_type l_ = m_nnd.rank(l); // l_ inclusive 00439 size_type r_ = m_nnd.rank(r); // r_ exclusive 00440 00441 if (r_ > l_) { 00442 size_type min_ex_pos_ = r_; 00443 if (l_/m_block_size == r_/m_block_size) { 00444 min_ex_pos_ = algorithm::near_rmq_open(m_pioneer_bp, l_, r_); 00445 } else if (r_ < m_pioneer_bp.size()) { 00446 size_type min_ex_ = excess_pioneer(r_)+2*(m_pioneer_bp[r_]==0); 00447 const size_type bl_ = (l_/m_block_size+1)*m_block_size; 00448 const size_type br_ = (r_/m_block_size)*m_block_size; 00449 00450 // 2.2 00451 size_type l__ = m_nnd2.rank(l_); // l__ inclusive 00452 size_type r__ = m_nnd2.rank(r_); // r__ exclusive 00453 if (r__ > l__) { 00454 size_type max_match = 0; 00455 k = m_range_max_match(l__, r__-1); 00456 max_match = m_match[k]; 00457 if (max_match >= r__) { 00458 k = m_nnd2.select(k+1); 00459 if (k < r_ and (ex=excess_pioneer(k)) < min_ex_) { 00460 min_ex_ = ex; min_ex_pos_ = k; 00461 } 00462 } 00463 } 00464 if (min_ex_pos_ == r_) { 00465 // 2.1 00466 k = algorithm::near_rmq_open(m_pioneer_bp, br_, r_); 00467 if (k < r_ and (ex=excess_pioneer(k)) < min_ex_) { 00468 min_ex_ = ex; min_ex_pos_ = k; 00469 } 00470 } 00471 // 2.3 00472 k = algorithm::near_rmq_open(m_pioneer_bp, l_, bl_); 00473 if (k < bl_ and (ex=excess_pioneer(k)) < min_ex_) { 00474 min_ex_ = ex; min_ex_pos_ = k; 00475 } 00476 } 00477 // 2.4 00478 if (min_ex_pos_ < r_) { 00479 k = m_nnd.select(min_ex_pos_ + 1); 00480 if ((ex=excess(k)) < min_ex) { 00481 min_ex = ex; min_ex_pos = k; 00482 } 00483 } 00484 } 00485 if (min_ex_pos == r) { 00486 // 1.1 00487 k = algorithm::near_rmq_open(*m_bp, br, r); 00488 if (k < r and (ex=excess(k)) < min_ex) { 00489 min_ex = ex; min_ex_pos = k; 00490 } 00491 } 00492 // 1.3 00493 k = algorithm::near_rmq_open(*m_bp, l, bl); 00494 if (k < bl and (ex=excess(k)) < min_ex) { 00495 min_ex = ex; min_ex_pos = k; 00496 } 00497 } 00498 // 1.4 00499 if (min_ex_pos < r) 00500 return min_ex_pos; 00501 else 00502 return size(); 00503 } 00504 00506 00511 size_type rr_enclose_naive(size_type i, size_type j)const { 00512 assert(j > i and j < m_size); 00513 size_type mi = find_close(i); // matching parenthesis to i 00514 assert(mi > i and mi < j); 00515 assert(find_close(j) > j); 00516 size_type k = enclose(j); 00517 if (k == m_size or k < i) // there exists no opening parenthesis at position mi<k<j. 00518 return m_size; 00519 size_type kk; 00520 do { 00521 kk = k; 00522 k = enclose(k); 00523 } while (k != m_size and k > mi); 00524 return kk; 00525 } 00526 00528 00531 size_type rmq(size_type l, size_type r)const { 00532 assert(l<=r); 00533 size_type m = rmq_open(l, r+1); 00534 if (m==l) 00535 return l; 00536 else { // m>l and m<=r 00537 assert(0 == (*m_bp)[m-1]); 00538 size_type prev_open = m_rank_bp(m); 00539 if (prev_open == 0 or m_select_bp(prev_open) < l) { // if there exists no opening parenthesis to the left of m which is greater or equal than l 00540 return l; 00541 } else { 00542 return m-1; 00543 } 00544 } 00545 } 00546 00548 00553 size_type double_enclose(size_type i, size_type j)const { 00554 assert(j > i); 00555 assert((*m_bp)[i]==1 and (*m_bp)[j]==1); 00556 size_type k = rr_enclose(i, j); 00557 if (k == size()) 00558 return enclose(j); 00559 else 00560 return enclose(k); 00561 } 00562 00564 00566 size_type preceding_closing_parentheses(size_type i)const { 00567 assert(i < m_size); 00568 if (!i) return 0; 00569 size_type ones = m_rank_bp(i); 00570 if (ones) { // ones > 0 00571 assert(m_select_bp(ones) < i); 00572 return i - m_select_bp(ones) - 1; 00573 } else { 00574 return i; 00575 } 00576 } 00577 00581 size_type size() const { 00582 return m_size; 00583 } 00584 00586 00590 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { 00591 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00592 size_type written_bytes = 0; 00593 written_bytes += m_rank_bp.serialize(out, child, "bp_rank"); 00594 written_bytes += m_select_bp.serialize(out, child, "bp_select"); 00595 written_bytes += m_nnd.serialize(out, child,"nearest_neighbor_dictionary"); 00596 00597 written_bytes += m_pioneer_bp.serialize(out, child, "pioneer_bp"); 00598 written_bytes += m_rank_pioneer_bp.serialize(out, child, "pioneer_bp_rank"); 00599 written_bytes += m_nnd2.serialize(out, child, "nearest_neighbor_dictionary2"); 00600 written_bytes += m_match.serialize(out, child, "match_answers"); 00601 written_bytes += m_enclose.serialize(out, child, "enclose_answers"); 00602 written_bytes += m_range_max_match.serialize(out, child, "rmq_answers"); 00603 00604 written_bytes += util::write_member(m_block_size, out, child, "block_size"); 00605 written_bytes += util::write_member(m_size, out, child, "size"); 00606 written_bytes += util::write_member(m_blocks, out, child, "block_cnt"); 00607 structure_tree::add_size(child, written_bytes); 00608 return written_bytes; 00609 } 00610 00612 00616 void load(std::istream& in, const bit_vector* bp) { 00617 m_bp = bp; 00618 m_rank_bp.load(in, m_bp); 00619 m_select_bp.load(in, m_bp); 00620 m_nnd.load(in); 00621 00622 m_pioneer_bp.load(in); 00623 m_rank_pioneer_bp.load(in, &m_pioneer_bp); 00624 m_nnd2.load(in); 00625 m_match.load(in); 00626 m_enclose.load(in); 00627 m_range_max_match.load(in, &m_match); 00628 util::read_member(m_block_size, in); 00629 util::read_member(m_size, in); 00630 assert(m_size == bp->size()); 00631 util::read_member(m_blocks, in); 00632 } 00633 00634 std::string get_info()const { 00635 std::stringstream ss; 00636 ss<<"number of parentheses: "<< (m_bp != NULL ? m_bp->size() : 0) <<std::endl; 00637 00638 return ss.str(); 00639 } 00640 }; 00641 00642 }// end namespace 00643 00644 00645 00646 00647 #endif