SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2009 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef INCLUDED_SDSL_BP_SUPPORT_GG 00022 #define INCLUDED_SDSL_BP_SUPPORT_GG 00023 00024 #include "int_vector.hpp" 00025 #include "nearest_neighbour_dictionary.hpp" 00026 #include "rank_support.hpp" 00027 #include "select_support.hpp" 00028 #include "algorithms.hpp" 00029 #include "util.hpp" 00030 #include "testutils.hpp" 00031 #include <stack> 00032 #include <map> 00033 #include <set> 00034 #include <utility> 00035 #include <iostream> 00036 #include <sstream> // for get_info method 00037 #include <fstream> 00038 #include <stdexcept> 00039 #include <sstream> 00040 00041 namespace sdsl 00042 { 00043 00045 00064 template<class NearestNeighbourDictionary = nearest_neighbour_dictionary<30>, 00065 class RankSupport = rank_support_v<>, 00066 class SelectSupport = select_support_mcl<> > 00067 class bp_support_gg 00068 { 00069 public: 00070 typedef bit_vector::size_type size_type; 00071 typedef NearestNeighbourDictionary nnd_type; 00072 typedef RankSupport rank_support_type; 00073 typedef SelectSupport select_support_type; 00074 typedef bp_support_gg<nnd_type, RankSupport, select_support_bs<RankSupport> > bp_support_type; 00075 private: 00076 const bit_vector* m_bp; // the supported balanced parentheses sequence as bit_vector 00077 rank_support_type m_rank_bp; // rank support for the balanced parentheses sequence => see excess() and rank() 00078 select_support_type m_select_bp; // select support for the balanced parentheses sequence => see select() 00079 00080 nnd_type m_nnd; // nearest neighbour dictionary for pioneers bit_vector 00081 00082 bit_vector m_pioneer_bp; // first level of recursion: balanced parentheses sequence of the pioneers 00083 bp_support_type* m_pioneer_bp_support; 00084 00085 uint32_t m_block_size; 00086 size_type m_size; 00087 size_type m_blocks; // number of blocks 00088 00089 void copy(const bp_support_gg& bp_support) { 00090 m_bp = bp_support.m_bp; 00091 m_rank_bp = bp_support.m_rank_bp; 00092 m_rank_bp.set_vector(m_bp); 00093 m_select_bp = bp_support.m_select_bp; 00094 m_select_bp.set_vector(m_bp); 00095 00096 m_nnd = bp_support.m_nnd; 00097 00098 m_block_size = bp_support.m_block_size; 00099 m_size = bp_support.m_size; 00100 m_blocks = bp_support.m_blocks; 00101 00102 00103 m_pioneer_bp = bp_support.m_pioneer_bp; 00104 if (bp_support.m_pioneer_bp_support == NULL) { 00105 if (m_pioneer_bp_support != NULL) 00106 delete m_pioneer_bp_support; 00107 m_pioneer_bp_support = NULL; 00108 } else { 00109 if (m_pioneer_bp_support != NULL) 00110 delete m_pioneer_bp_support; 00111 m_pioneer_bp_support = new bp_support_gg<nnd_type, RankSupport, select_support_bs<RankSupport> >(*(bp_support.m_pioneer_bp_support)); 00112 assert(m_pioneer_bp_support != NULL); 00113 m_pioneer_bp_support->set_vector(&m_pioneer_bp); 00114 } 00115 } 00116 00117 public: 00118 00119 const RankSupport& bp_rank; 00120 const SelectSupport& bp_select; 00121 00122 bp_support_gg():m_bp(NULL), m_pioneer_bp_support(NULL), m_block_size(840), m_size(0), m_blocks(0),bp_rank(m_rank_bp),bp_select(m_select_bp) {} 00123 00125 // TODO: einen Konstruktor ohne das const bei bit_vector, damit bei calculate_pioneers_bitmap_succinct bp ueberschrieben werden kann? 00126 explicit bp_support_gg(const bit_vector* bp, uint32_t used_block_size = 840):m_bp(bp), m_pioneer_bp_support(NULL), m_block_size(used_block_size), m_size(bp==NULL?0:bp->size()), m_blocks((m_size+used_block_size-1)/used_block_size),bp_rank(m_rank_bp),bp_select(m_select_bp) { 00127 if (m_block_size<=2) { 00128 throw std::logic_error(util::demangle(typeid(this).name())+": block_size should be greater than 2!"); 00129 } 00130 if (bp == NULL) { // -> m_bp == NULL 00131 return; 00132 } 00133 util::init_support(m_rank_bp, bp); 00134 util::init_support(m_select_bp, bp); 00135 { 00136 bit_vector pioneer; 00137 // calulate pioneers 00138 // algorithm::calculate_pioneers_bitmap(*m_bp, m_block_size, pioneer); 00139 algorithm::calculate_pioneers_bitmap_succinct(*m_bp, m_block_size, pioneer); 00140 m_nnd = nnd_type(pioneer); 00141 } 00142 00143 m_pioneer_bp.resize(m_nnd.ones()); 00144 if (m_nnd.ones() > 0 and m_nnd.ones() == m_bp->size()) { // m_bp != NULL see above 00145 throw std::logic_error(util::demangle(typeid(this).name())+": recursion in the construction does not terminate!"); 00146 } 00147 00148 for (size_type i=1; i<= m_nnd.ones(); ++i) { // replace this by an iterator!!! see TODO for the nnd data structure 00149 m_pioneer_bp[i-1] = (*m_bp)[m_nnd.select(i)]; 00150 } 00151 00152 if (m_bp->size() > 0) { // m_bp != NULL see above 00153 m_pioneer_bp_support = new bp_support_gg<nnd_type, RankSupport, select_support_bs<RankSupport> >(&m_pioneer_bp, m_block_size); 00154 } 00155 } 00156 00158 bp_support_gg(const bp_support_gg& bp_support):m_pioneer_bp_support(NULL),bp_rank(m_rank_bp),bp_select(m_select_bp) { 00159 copy(bp_support); 00160 } 00161 00163 ~bp_support_gg() { 00164 if (m_pioneer_bp_support != NULL) 00165 delete m_pioneer_bp_support; 00166 } 00167 00169 void swap(bp_support_gg& bp_support) { 00170 m_rank_bp.swap(bp_support.m_rank_bp); 00171 m_select_bp.swap(bp_support.m_select_bp); 00172 m_nnd.swap(bp_support.m_nnd); 00173 00174 std::swap(m_block_size, bp_support.m_block_size); 00175 std::swap(m_size, bp_support.m_size); 00176 std::swap(m_blocks, bp_support.m_blocks); 00177 00178 m_pioneer_bp.swap(bp_support.m_pioneer_bp); 00179 00180 std::swap(m_pioneer_bp_support, bp_support.m_pioneer_bp_support); 00181 if (m_pioneer_bp_support != NULL) { 00182 m_pioneer_bp_support->set_vector(&m_pioneer_bp); 00183 } 00184 if (bp_support.m_pioneer_bp_support != NULL) { 00185 bp_support.m_pioneer_bp_support->set_vector(&(bp_support.m_pioneer_bp)); 00186 } 00187 } 00188 00190 bp_support_gg& operator=(const bp_support_gg& bp_support) { 00191 if (this != &bp_support) { 00192 copy(bp_support); 00193 } 00194 return *this; 00195 } 00196 00197 void set_vector(const bit_vector* bp) { 00198 m_bp = bp; 00199 m_rank_bp.set_vector(bp); 00200 m_select_bp.set_vector(bp); 00201 } 00202 00206 inline size_type excess(size_type i)const { 00207 return (m_rank_bp(i+1)<<1)-i-1; 00208 } 00209 00213 size_type rank(size_type i)const { 00214 return m_rank_bp(i+1); 00215 } 00216 00222 size_type select(size_type i)const { 00223 return m_select_bp(i); 00224 } 00225 00232 size_type find_close(size_type i)const { 00233 #ifdef SDSL_DEBUG_BP 00234 if (i >= m_size) { 00235 throw std::out_of_range("OUT_OF_RANGE: bp_support_gg::find_close"); 00236 } 00237 #endif 00238 if (!(*m_bp)[i]) {// if there is a closing parenthesis at index i return i 00239 return i; 00240 } 00241 size_type mi = 0; // match for i 00242 if ((mi=algorithm::near_find_closing(*m_bp, i+1, 1, m_block_size))==i) { 00243 const size_type i_ = m_nnd.rank(i+1)-1; // lemma that this gives us an opening pioneer 00244 assert(m_pioneer_bp[i_]==1); // assert that i2 is an opening parenthesis 00245 size_type mi_ = m_pioneer_bp_support->find_close(i_); assert(m_pioneer_bp[mi_]==0); 00246 mi = m_nnd.select(mi_+1); /* matching pioneer position in bp */ assert((*m_bp)[mi]==0); 00247 mi = (mi/m_block_size)*m_block_size; 00248 // size_type epb = excess(mi); // excess of first parenthesis in the pioneer block 00249 size_type epb2 = excess(mi-1); // excess of first parenthesis in the pioneer block 00250 const size_type ei = excess(i); // excess at position i 00251 /* invariant: epb >= ei-1 */ //assert( epb+1 >= ei ); 00252 /* while( epb+1 != ei ){ assert( mi < m_size ); 00253 if( (*m_bp)[++mi] ) 00254 ++epb; 00255 else 00256 --epb; 00257 } 00258 */ 00259 return algorithm::near_find_closing(*m_bp, mi, epb2-ei+1, m_block_size); 00260 00261 } 00262 return mi; 00263 } 00264 00266 00271 size_type find_open(size_type i)const { 00272 #ifdef SDSL_DEBUG_BP 00273 if (i >= m_size) { 00274 std::stringstream ss; 00275 ss<<"i="<<i<<" m_size="<<m_size<<std::endl; 00276 throw std::out_of_range("OUT_OF_RANGE: bp_support_gg::find_open, "+ss.str()); 00277 } 00278 #endif 00279 if ((*m_bp)[i]) {// if there is a opening parenthesis at index i return i 00280 return i; 00281 } 00282 size_type mi = 0; // match for i 00283 // if( (mi=algorithm::near_find_open( *m_bp, i, m_block_size)) == i ){ 00284 if ((mi=algorithm::near_find_opening(*m_bp, i-1, 1, m_block_size)) == i) { 00285 //std::cerr<<"no near match found"<<" i="<<i<<" bp[i]="<<(*m_bp)[i]<<std::endl; 00286 const size_type i_ = m_nnd.rank(i); // lemma that this gives us an closing pioneer 00287 assert(m_pioneer_bp[i_]==0); // assert that i' is an opening parenthesis 00288 const size_type mi_ = m_pioneer_bp_support->find_open(i_); assert(m_pioneer_bp[mi_]==1); 00289 mi = m_nnd.select(mi_+1); /* matching pioneer position in bp */ assert((*m_bp)[mi]==1); 00290 mi = (mi/m_block_size)*m_block_size + m_block_size - 1; assert(mi < i); 00291 // size_type epb = excess(mi); // excess of last parenthesis in the pioneer block 00292 size_type epb2 = excess(mi+1); // excess of last parenthesis in the pioneer block 00293 const size_type ei = excess(i); // excess at position i 00294 /*invariant: epb >= ei+1*/ //assert( epb >= ei+1 ); 00295 /* while( epb != ei ){ assert( mi < m_size ); 00296 if( (*m_bp)[mi--] ) 00297 --epb; 00298 else 00299 ++epb; 00300 } 00301 ++mi; 00302 */ 00303 return algorithm::near_find_opening(*m_bp, mi, epb2-ei+1-2*((*m_bp)[mi+1]), m_block_size); 00304 } 00305 return mi; 00306 } 00307 00309 00313 size_type enclose(size_type i)const { 00314 #ifdef SDSL_DEBUG_BP 00315 if (i >= m_size) { 00316 throw std::out_of_range("OUT_OF_RANGE: bp_support_gg::enclose."); 00317 } 00318 #endif 00319 if (!(*m_bp)[i]) { // if there is closing parenthesis at position i 00320 return find_open(i); 00321 } 00322 const size_type exi = excess(i); 00323 if (exi == 1) // if i is not enclosed by a parentheses pair.. 00324 return size(); 00325 size_type ei; // enclose for i 00326 // if( (ei=algorithm::near_enclose( *m_bp, i, m_block_size )) == i ){ 00327 if ((ei=algorithm::near_find_opening(*m_bp, i-1, 1, m_block_size)) == i) { 00328 const size_type i_ = m_nnd.rank(i); // next parenthesis in the pioneer bitmap 00329 size_type ei_; // enclose for i' 00330 ei_ = m_pioneer_bp_support->enclose(i_); 00331 assert(m_pioneer_bp[ei_]==1); 00332 ei = m_nnd.select(ei_+1); assert((*m_bp)[ei]==1); 00333 ei = (ei/m_block_size)*m_block_size + m_block_size - 1; assert(ei < i); 00334 // size_type epb = excess(ei); // excess of the last parenthesis in the pioneer block 00335 size_type epb2 = excess(ei+1); // excess of last parenthesis in the pioneer block 00336 /* invariant epb+1 >= exi */ //assert( epb+1 >= exi ); 00337 /* while( epb+2 != exi ){ 00338 if( (*m_bp)[ei--] ) 00339 --epb; 00340 else 00341 ++epb; 00342 } 00343 ++ei; 00344 */ 00345 return algorithm::near_find_opening(*m_bp, ei, epb2-exi+1+2*((*m_bp)[ei+1]==0), m_block_size); 00346 } 00347 return ei; 00348 } 00349 00351 00358 size_type rr_enclose(const size_type i, const size_type j)const { 00359 assert(j < m_size); 00360 assert((*m_bp)[i]==1 and (*m_bp)[j]==1); 00361 const size_type mip1 = find_close(i)+1; 00362 if (mip1 >= j) 00363 return size(); 00364 return rmq_open(mip1, j); 00365 } 00366 00375 size_type rmq_open(const size_type l, const size_type r)const { 00376 if (l >= r) 00377 return size(); 00378 size_type min_ex_pos = r; 00379 00380 if (l/m_block_size == r/m_block_size) { 00381 min_ex_pos = algorithm::near_rmq_open(*m_bp, l, r); 00382 } else { // parentheses pair does not start in the same block 00383 // muss nicht sein: assert( l>=1 ); // l is at greater or equal than 1 00384 // note: l and r are not in the same block 00385 size_type k, ex; // helper variables 00386 size_type min_ex = excess(r) + 2*((*m_bp)[r]==0);// minimal excess 00387 00388 00389 // 1.2 00390 size_type l_ = m_nnd.rank(l); // l_ inclusive 00391 size_type r_ = m_nnd.rank(r); // r_ exclusive 00392 00393 size_type min_ex_pos_ = m_pioneer_bp_support->rmq_open(l_, r_); 00394 if (min_ex_pos_ < r_) { 00395 k = m_nnd.select(min_ex_pos_ + 1); 00396 min_ex = excess(k); min_ex_pos = k; 00397 } else { 00398 // 1.1 00399 k = algorithm::near_rmq_open(*m_bp, (r/m_block_size)*m_block_size, r); 00400 if (k < r) { 00401 assert(excess(k) < min_ex); 00402 min_ex = excess(k); min_ex_pos = k; 00403 } 00404 } 00405 // 1.3 00406 k = algorithm::near_rmq_open(*m_bp, l, (l/m_block_size+1)*m_block_size); 00407 if (k < (l/m_block_size+1)*m_block_size and (ex=excess(k)) < min_ex) { 00408 min_ex = ex; min_ex_pos = k; 00409 } 00410 } 00411 // 1.4 00412 if (min_ex_pos < r) 00413 return min_ex_pos; 00414 else 00415 return size(); 00416 } 00417 /* 00418 size_type _rmq_open(const size_type l, const size_type r)const{ 00419 if(l >= r) 00420 return size(); 00421 size_type min_ex_pos = r; 00422 00423 if( l/m_block_size == r/m_block_size ){ 00424 min_ex_pos = algorithm::near_rmq_open(*m_bp, l, r); 00425 } 00426 else{ // parentheses pair does not start in the same block 00427 // muss nicht sein assert( l>=1 ); // l is at greater or equal than 1 00428 // note: l and r are not in the same block 00429 size_type k, ex; // helper variables 00430 size_type min_ex = excess(r) + 2*((*m_bp)[r]==0);// minimal excess 00431 00432 // 1.3 00433 k = algorithm::near_rmq_open(*m_bp, l, (l/m_block_size+1)*m_block_size ); 00434 if( k < (l/m_block_size+1)*m_block_size ){ 00435 ex = find_close(k); 00436 if( ex >= r ) 00437 return k; 00438 } 00439 00440 // 1.2 00441 size_type l_ = m_nnd.rank( l ); // l_ inclusive 00442 size_type r_ = m_nnd.rank( r ); // r_ exclusive 00443 00444 size_type min_ex_pos_ = m_pioneer_bp_support->rmq_open(l_, r_); 00445 if( min_ex_pos_ < r_ ){ 00446 k = m_nnd.select(min_ex_pos_ + 1 ); 00447 min_ex = excess(k); min_ex_pos = k; 00448 }else{ 00449 // 1.1 00450 k = algorithm::near_rmq_open(*m_bp, (r/m_block_size)*m_block_size, r); 00451 if( k < r ){ 00452 assert(excess(k) < min_ex); 00453 min_ex = excess(k); min_ex_pos = k; 00454 } 00455 } 00456 } 00457 // 1.4 00458 if( min_ex_pos < r ) 00459 return min_ex_pos; 00460 else 00461 return size(); 00462 } 00463 */ 00464 00466 00473 size_type rr_enclose_naive(size_type i, size_type j)const { 00474 assert(j > i and j < m_size); 00475 assert((*m_bp)[i]==1 and (*m_bp)[j]==1); 00476 size_type mi = find_close(i); // matching parenthesis to i 00477 assert(mi > i and mi < j); 00478 assert(find_close(j) > j); 00479 size_type k = enclose(j); 00480 if (k == m_size or k < i) // there exists no opening parenthesis at position mi<k<j. 00481 return m_size; 00482 size_type kk; 00483 do { 00484 kk = k; 00485 k = enclose(k); 00486 } while (k != m_size and k > mi); 00487 return kk; 00488 } 00489 00491 00495 size_type rmq(size_type l, size_type r)const { 00496 assert(l<=r); 00497 size_type m = rmq_open(l, r+1); 00498 if (m==size()) 00499 return r; 00500 else if (m==l) 00501 return l; 00502 else { // m>l and m<=r 00503 assert(0 == (*m_bp)[m-1]); 00504 return m-1; 00505 } 00506 } 00507 00509 00514 size_type double_enclose(size_type i, size_type j)const { 00515 assert(j > i); 00516 assert((*m_bp)[i]==1 and (*m_bp)[j]==1); 00517 size_type k = rr_enclose(i, j); 00518 if (k == size()) 00519 return enclose(j); 00520 else 00521 return enclose(k); 00522 } 00523 00525 00527 size_type preceding_closing_parentheses(size_type i)const { 00528 assert(i < m_size); 00529 if (!i) return 0; 00530 size_type ones = m_rank_bp(i); 00531 if (ones) { // ones > 0 00532 assert(m_select_bp(ones) < i); 00533 return i - m_select_bp(ones) - 1; 00534 } else { 00535 return i; 00536 } 00537 } 00538 00542 size_type size() const { 00543 return m_size; 00544 } 00545 00547 00551 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { 00552 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00553 size_type written_bytes = 0; 00554 written_bytes += util::write_member(m_block_size, out, child, "block_size"); 00555 written_bytes += util::write_member(m_size, out, child, "size"); 00556 written_bytes += util::write_member(m_blocks, out, child, "block_cnt"); 00557 00558 written_bytes += m_rank_bp.serialize(out, child, "bp_rank"); 00559 written_bytes += m_select_bp.serialize(out, child, "bp_select"); 00560 written_bytes += m_nnd.serialize(out, child, "nearest_neighbour_dictionary"); 00561 00562 written_bytes += m_pioneer_bp.serialize(out, child, "pioneer_bp"); 00563 if (m_bp != NULL and m_bp->size() > 0) 00564 written_bytes += m_pioneer_bp_support->serialize(out, child, "pioneer_bp_support"); 00565 structure_tree::add_size(child, written_bytes); 00566 return written_bytes; 00567 } 00568 00570 00574 void load(std::istream& in, const bit_vector* bp) { 00575 m_bp = bp; 00576 util::read_member(m_block_size, in); 00577 util::read_member(m_size, in); 00578 util::read_member(m_blocks, in); 00579 00580 m_rank_bp.load(in, m_bp); 00581 m_select_bp.load(in, m_bp); 00582 m_nnd.load(in); 00583 00584 m_pioneer_bp.load(in); 00585 if (m_pioneer_bp_support != NULL) { 00586 delete m_pioneer_bp_support; 00587 m_pioneer_bp_support = NULL; 00588 } 00589 if (m_bp != NULL and m_bp->size() > 0) { 00590 m_pioneer_bp_support = new bp_support_gg<nnd_type, RankSupport, select_support_bs<RankSupport> >(); 00591 m_pioneer_bp_support->load(in, &m_pioneer_bp); 00592 } 00593 } 00594 00595 std::string get_info()const { 00596 std::stringstream ss; 00597 ss<<"number of parentheses: "<<m_bp->size()<<std::endl; 00598 00599 return ss.str(); 00600 } 00601 }; 00602 00603 }// end namespace 00604 00605 00606 00607 00608 #endif