SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/bp_support_gg.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2009 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef INCLUDED_SDSL_BP_SUPPORT_GG
00022 #define INCLUDED_SDSL_BP_SUPPORT_GG
00023 
00024 #include "int_vector.hpp"
00025 #include "nearest_neighbour_dictionary.hpp"
00026 #include "rank_support.hpp"
00027 #include "select_support.hpp"
00028 #include "algorithms.hpp"
00029 #include "util.hpp"
00030 #include "testutils.hpp"
00031 #include <stack>
00032 #include <map>
00033 #include <set>
00034 #include <utility>
00035 #include <iostream>
00036 #include <sstream> // for get_info method
00037 #include <fstream>
00038 #include <stdexcept>
00039 #include <sstream>
00040 
00041 namespace sdsl
00042 {
00043 
00045 
00064 template<class NearestNeighbourDictionary = nearest_neighbour_dictionary<30>,
00065          class RankSupport = rank_support_v<>,
00066          class SelectSupport = select_support_mcl<> >
00067 class bp_support_gg
00068 {
00069     public:
00070         typedef bit_vector::size_type size_type;
00071         typedef NearestNeighbourDictionary nnd_type;
00072         typedef RankSupport                                rank_support_type;
00073         typedef SelectSupport                      select_support_type;
00074         typedef bp_support_gg<nnd_type, RankSupport, select_support_bs<RankSupport> > bp_support_type;
00075     private:
00076         const bit_vector*           m_bp;                         // the supported balanced parentheses sequence as bit_vector
00077         rank_support_type               m_rank_bp;        // rank support for the balanced parentheses sequence => see excess() and rank()
00078         select_support_type             m_select_bp;      // select support for the balanced parentheses sequence => see select()
00079 
00080         nnd_type                                m_nnd;                    // nearest neighbour dictionary for pioneers bit_vector
00081 
00082         bit_vector                              m_pioneer_bp;     // first level of recursion: balanced parentheses sequence of the pioneers
00083         bp_support_type*                m_pioneer_bp_support;
00084 
00085         uint32_t m_block_size;
00086         size_type m_size;
00087         size_type m_blocks; // number of blocks
00088 
00089         void copy(const bp_support_gg& bp_support) {
00090             m_bp = bp_support.m_bp;
00091             m_rank_bp = bp_support.m_rank_bp;
00092             m_rank_bp.set_vector(m_bp);
00093             m_select_bp = bp_support.m_select_bp;
00094             m_select_bp.set_vector(m_bp);
00095 
00096             m_nnd = bp_support.m_nnd;
00097 
00098             m_block_size = bp_support.m_block_size;
00099             m_size = bp_support.m_size;
00100             m_blocks = bp_support.m_blocks;
00101 
00102 
00103             m_pioneer_bp = bp_support.m_pioneer_bp;
00104             if (bp_support.m_pioneer_bp_support == NULL) {
00105                 if (m_pioneer_bp_support != NULL)
00106                     delete m_pioneer_bp_support;
00107                 m_pioneer_bp_support = NULL;
00108             } else {
00109                 if (m_pioneer_bp_support != NULL)
00110                     delete m_pioneer_bp_support;
00111                 m_pioneer_bp_support = new bp_support_gg<nnd_type, RankSupport, select_support_bs<RankSupport> >(*(bp_support.m_pioneer_bp_support));
00112                 assert(m_pioneer_bp_support != NULL);
00113                 m_pioneer_bp_support->set_vector(&m_pioneer_bp);
00114             }
00115         }
00116 
00117     public:
00118 
00119         const RankSupport& bp_rank;
00120         const SelectSupport& bp_select;
00121 
00122         bp_support_gg():m_bp(NULL), m_pioneer_bp_support(NULL), m_block_size(840), m_size(0), m_blocks(0),bp_rank(m_rank_bp),bp_select(m_select_bp) {}
00123 
00125         // TODO: einen Konstruktor ohne das const bei bit_vector, damit bei calculate_pioneers_bitmap_succinct bp ueberschrieben werden kann?
00126         explicit bp_support_gg(const bit_vector* bp, uint32_t used_block_size = 840):m_bp(bp), m_pioneer_bp_support(NULL), m_block_size(used_block_size), m_size(bp==NULL?0:bp->size()), m_blocks((m_size+used_block_size-1)/used_block_size),bp_rank(m_rank_bp),bp_select(m_select_bp) {
00127             if (m_block_size<=2) {
00128                 throw std::logic_error(util::demangle(typeid(this).name())+": block_size should be greater than 2!");
00129             }
00130             if (bp == NULL) { // -> m_bp == NULL
00131                 return;
00132             }
00133             util::init_support(m_rank_bp, bp);
00134             util::init_support(m_select_bp, bp);
00135             {
00136                 bit_vector pioneer;
00137                 // calulate pioneers
00138 //                              algorithm::calculate_pioneers_bitmap(*m_bp, m_block_size, pioneer);
00139                 algorithm::calculate_pioneers_bitmap_succinct(*m_bp, m_block_size, pioneer);
00140                 m_nnd = nnd_type(pioneer);
00141             }
00142 
00143             m_pioneer_bp.resize(m_nnd.ones());
00144             if (m_nnd.ones() > 0  and m_nnd.ones() == m_bp->size()) { // m_bp != NULL see above
00145                 throw std::logic_error(util::demangle(typeid(this).name())+": recursion in the construction does not terminate!");
00146             }
00147 
00148             for (size_type i=1; i<= m_nnd.ones(); ++i) { // replace this by an iterator!!! see TODO for the nnd data structure
00149                 m_pioneer_bp[i-1] = (*m_bp)[m_nnd.select(i)];
00150             }
00151 
00152             if (m_bp->size() > 0) { // m_bp != NULL see above
00153                 m_pioneer_bp_support = new bp_support_gg<nnd_type, RankSupport, select_support_bs<RankSupport> >(&m_pioneer_bp, m_block_size);
00154             }
00155         }
00156 
00158         bp_support_gg(const bp_support_gg& bp_support):m_pioneer_bp_support(NULL),bp_rank(m_rank_bp),bp_select(m_select_bp) {
00159             copy(bp_support);
00160         }
00161 
00163         ~bp_support_gg() {
00164             if (m_pioneer_bp_support != NULL)
00165                 delete m_pioneer_bp_support;
00166         }
00167 
00169         void swap(bp_support_gg& bp_support) {
00170             m_rank_bp.swap(bp_support.m_rank_bp);
00171             m_select_bp.swap(bp_support.m_select_bp);
00172             m_nnd.swap(bp_support.m_nnd);
00173 
00174             std::swap(m_block_size, bp_support.m_block_size);
00175             std::swap(m_size, bp_support.m_size);
00176             std::swap(m_blocks, bp_support.m_blocks);
00177 
00178             m_pioneer_bp.swap(bp_support.m_pioneer_bp);
00179 
00180             std::swap(m_pioneer_bp_support, bp_support.m_pioneer_bp_support);
00181             if (m_pioneer_bp_support != NULL) {
00182                 m_pioneer_bp_support->set_vector(&m_pioneer_bp);
00183             }
00184             if (bp_support.m_pioneer_bp_support != NULL) {
00185                 bp_support.m_pioneer_bp_support->set_vector(&(bp_support.m_pioneer_bp));
00186             }
00187         }
00188 
00190         bp_support_gg& operator=(const bp_support_gg& bp_support) {
00191             if (this != &bp_support) {
00192                 copy(bp_support);
00193             }
00194             return *this;
00195         }
00196 
00197         void set_vector(const bit_vector* bp) {
00198             m_bp = bp;
00199             m_rank_bp.set_vector(bp);
00200             m_select_bp.set_vector(bp);
00201         }
00202 
00206         inline size_type excess(size_type i)const {
00207             return (m_rank_bp(i+1)<<1)-i-1;
00208         }
00209 
00213         size_type rank(size_type i)const {
00214             return m_rank_bp(i+1);
00215         }
00216 
00222         size_type select(size_type i)const {
00223             return m_select_bp(i);
00224         }
00225 
00232         size_type find_close(size_type i)const {
00233 #ifdef SDSL_DEBUG_BP
00234             if (i >= m_size) {
00235                 throw std::out_of_range("OUT_OF_RANGE: bp_support_gg::find_close");
00236             }
00237 #endif
00238             if (!(*m_bp)[i]) {// if there is a closing parenthesis at index i return i
00239                 return i;
00240             }
00241             size_type mi = 0; // match for i
00242             if ((mi=algorithm::near_find_closing(*m_bp, i+1, 1, m_block_size))==i) {
00243                 const size_type i_ = m_nnd.rank(i+1)-1; // lemma that this gives us an opening pioneer
00244                 assert(m_pioneer_bp[i_]==1); // assert that i2 is an opening parenthesis
00245                 size_type mi_ = m_pioneer_bp_support->find_close(i_);    assert(m_pioneer_bp[mi_]==0);
00246                 mi = m_nnd.select(mi_+1);  /* matching pioneer position in bp */ assert((*m_bp)[mi]==0);
00247                 mi = (mi/m_block_size)*m_block_size;
00248 //                              size_type epb = excess(mi); // excess of first parenthesis in the pioneer block
00249                 size_type epb2 = excess(mi-1); // excess of first parenthesis in the pioneer block
00250                 const size_type ei = excess(i);  // excess at position i
00251                 /* invariant: epb >= ei-1 */ //assert( epb+1 >= ei );
00252                 /*                              while( epb+1 != ei ){            assert( mi < m_size );
00253                                                         if( (*m_bp)[++mi] )
00254                                                                 ++epb;
00255                                                         else
00256                                                                 --epb;
00257                                                 }
00258                 */
00259                 return algorithm::near_find_closing(*m_bp, mi, epb2-ei+1, m_block_size);
00260 
00261             }
00262             return mi;
00263         }
00264 
00266 
00271         size_type find_open(size_type i)const {
00272 #ifdef SDSL_DEBUG_BP
00273             if (i >= m_size) {
00274                 std::stringstream ss;
00275                 ss<<"i="<<i<<" m_size="<<m_size<<std::endl;
00276                 throw std::out_of_range("OUT_OF_RANGE: bp_support_gg::find_open, "+ss.str());
00277             }
00278 #endif
00279             if ((*m_bp)[i]) {// if there is a opening parenthesis at index i return i
00280                 return i;
00281             }
00282             size_type mi = 0; // match for i
00283 //                      if( (mi=algorithm::near_find_open( *m_bp, i, m_block_size)) == i ){
00284             if ((mi=algorithm::near_find_opening(*m_bp, i-1, 1, m_block_size)) == i) {
00285 //std::cerr<<"no near match found"<<" i="<<i<<" bp[i]="<<(*m_bp)[i]<<std::endl;
00286                 const size_type i_ = m_nnd.rank(i); // lemma that this gives us an closing pioneer
00287                 assert(m_pioneer_bp[i_]==0); // assert that i' is an opening parenthesis
00288                 const size_type mi_ = m_pioneer_bp_support->find_open(i_);              assert(m_pioneer_bp[mi_]==1);
00289                 mi = m_nnd.select(mi_+1);  /* matching pioneer position in bp */ assert((*m_bp)[mi]==1);
00290                 mi = (mi/m_block_size)*m_block_size + m_block_size - 1;         assert(mi < i);
00291 //                              size_type epb = excess(mi); // excess of last parenthesis in the pioneer block
00292                 size_type epb2 = excess(mi+1); // excess of last parenthesis in the pioneer block
00293                 const size_type ei = excess(i);  // excess at position i
00294                 /*invariant: epb >= ei+1*/       //assert( epb >= ei+1 );
00295                 /*                              while( epb != ei ){                      assert( mi < m_size );
00296                                                         if( (*m_bp)[mi--] )
00297                                                                 --epb;
00298                                                         else
00299                                                                 ++epb;
00300                                                 }
00301                                                 ++mi;
00302                 */
00303                 return algorithm::near_find_opening(*m_bp, mi, epb2-ei+1-2*((*m_bp)[mi+1]), m_block_size);
00304             }
00305             return mi;
00306         }
00307 
00309 
00313         size_type enclose(size_type i)const {
00314 #ifdef SDSL_DEBUG_BP
00315             if (i >= m_size) {
00316                 throw std::out_of_range("OUT_OF_RANGE: bp_support_gg::enclose.");
00317             }
00318 #endif
00319             if (!(*m_bp)[i]) { // if there is closing parenthesis at position i
00320                 return find_open(i);
00321             }
00322             const size_type exi = excess(i);
00323             if (exi == 1)  // if i is not enclosed by a parentheses pair..
00324                 return size();
00325             size_type ei; // enclose  for i
00326 //                      if( (ei=algorithm::near_enclose( *m_bp, i, m_block_size )) == i ){
00327             if ((ei=algorithm::near_find_opening(*m_bp, i-1, 1,  m_block_size)) == i) {
00328                 const size_type i_ = m_nnd.rank(i); // next parenthesis in the pioneer bitmap
00329                 size_type ei_; // enclose for i'
00330                 ei_ = m_pioneer_bp_support->enclose(i_);
00331                 assert(m_pioneer_bp[ei_]==1);
00332                 ei = m_nnd.select(ei_+1);                                  assert((*m_bp)[ei]==1);
00333                 ei = (ei/m_block_size)*m_block_size + m_block_size - 1;    assert(ei < i);
00334 //                              size_type epb = excess(ei); // excess of the last parenthesis in the pioneer block
00335                 size_type epb2 = excess(ei+1); // excess of last parenthesis in the pioneer block
00336                 /* invariant epb+1 >= exi */ //assert( epb+1 >= exi );
00337                 /*                              while( epb+2 != exi ){
00338                                                         if( (*m_bp)[ei--] )
00339                                                            --epb;
00340                                                         else
00341                                                                 ++epb;
00342                                                 }
00343                                                 ++ei;
00344                                                 */
00345                 return algorithm::near_find_opening(*m_bp, ei, epb2-exi+1+2*((*m_bp)[ei+1]==0), m_block_size);
00346             }
00347             return ei;
00348         }
00349 
00351 
00358         size_type rr_enclose(const size_type i, const size_type j)const {
00359             assert(j < m_size);
00360             assert((*m_bp)[i]==1 and (*m_bp)[j]==1);
00361             const size_type mip1 = find_close(i)+1;
00362             if (mip1 >= j)
00363                 return size();
00364             return rmq_open(mip1, j);
00365         }
00366 
00375         size_type rmq_open(const size_type l, const size_type r)const {
00376             if (l >= r)
00377                 return size();
00378             size_type           min_ex_pos = r;
00379 
00380             if (l/m_block_size == r/m_block_size) {
00381                 min_ex_pos = algorithm::near_rmq_open(*m_bp, l, r);
00382             } else { // parentheses pair does not start in the same block
00383 // muss nicht sein:                             assert( l>=1 ); // l is at greater or equal than 1
00384                 // note: l and r are not in the same block
00385                 size_type               k, ex;  // helper variables
00386                 size_type               min_ex = excess(r) + 2*((*m_bp)[r]==0);// minimal excess
00387 
00388 
00389                 // 1.2
00390                 size_type l_ = m_nnd.rank(l);   //  l_ inclusive
00391                 size_type r_    = m_nnd.rank(r);   // r_ exclusive
00392 
00393                 size_type min_ex_pos_ = m_pioneer_bp_support->rmq_open(l_, r_);
00394                 if (min_ex_pos_ < r_) {
00395                     k = m_nnd.select(min_ex_pos_ + 1);
00396                     min_ex = excess(k); min_ex_pos = k;
00397                 } else {
00398                     // 1.1
00399                     k = algorithm::near_rmq_open(*m_bp, (r/m_block_size)*m_block_size, r);
00400                     if (k < r) {
00401                         assert(excess(k) < min_ex);
00402                         min_ex          = excess(k); min_ex_pos         = k;
00403                     }
00404                 }
00405                 // 1.3
00406                 k = algorithm::near_rmq_open(*m_bp, l, (l/m_block_size+1)*m_block_size);
00407                 if (k < (l/m_block_size+1)*m_block_size and (ex=excess(k)) < min_ex) {
00408                     min_ex = ex; min_ex_pos = k;
00409                 }
00410             }
00411             // 1.4
00412             if (min_ex_pos < r)
00413                 return min_ex_pos;
00414             else
00415                 return size();
00416         }
00417         /*
00418                         size_type _rmq_open(const size_type l, const size_type r)const{
00419                                 if(l >= r)
00420                                         return size();
00421                                 size_type               min_ex_pos = r;
00422 
00423                                 if( l/m_block_size == r/m_block_size ){
00424                                         min_ex_pos = algorithm::near_rmq_open(*m_bp, l, r);
00425                                 }
00426                                 else{ // parentheses pair does not start in the same block
00427         // muss nicht sein                              assert( l>=1 ); // l is at greater or equal than 1
00428                                         // note: l and r are not in the same block
00429                                         size_type               k, ex;  // helper variables
00430                                         size_type               min_ex = excess(r) + 2*((*m_bp)[r]==0);// minimal excess
00431 
00432                                         // 1.3
00433                                         k = algorithm::near_rmq_open(*m_bp, l, (l/m_block_size+1)*m_block_size );
00434                                         if( k < (l/m_block_size+1)*m_block_size ){
00435                                                 ex = find_close(k);
00436                                                 if( ex >= r )
00437                                                         return k;
00438                                         }
00439 
00440                                         // 1.2
00441                                         size_type l_ = m_nnd.rank( l ); //  l_ inclusive
00442                                         size_type r_    = m_nnd.rank( r ); // r_ exclusive
00443 
00444                                         size_type min_ex_pos_ = m_pioneer_bp_support->rmq_open(l_, r_);
00445                                         if( min_ex_pos_ < r_ ){
00446                                                 k = m_nnd.select(min_ex_pos_ + 1 );
00447                                                 min_ex = excess(k); min_ex_pos = k;
00448                                         }else{
00449                                                 // 1.1
00450                                                 k = algorithm::near_rmq_open(*m_bp, (r/m_block_size)*m_block_size, r);
00451                                                 if( k < r ){
00452                                                         assert(excess(k) < min_ex);
00453                                                         min_ex          = excess(k); min_ex_pos         = k;
00454                                                 }
00455                                         }
00456                                 }
00457                                 // 1.4
00458                                 if( min_ex_pos < r )
00459                                         return min_ex_pos;
00460                                 else
00461                                         return size();
00462                         }
00463         */
00464 
00466 
00473         size_type rr_enclose_naive(size_type i, size_type j)const {
00474             assert(j > i and j < m_size);
00475             assert((*m_bp)[i]==1 and (*m_bp)[j]==1);
00476             size_type mi = find_close(i); // matching parenthesis to i
00477             assert(mi > i and mi < j);
00478             assert(find_close(j) > j);
00479             size_type k = enclose(j);
00480             if (k == m_size or k < i) // there exists no opening parenthesis at position mi<k<j.
00481                 return m_size;
00482             size_type kk;
00483             do {
00484                 kk = k;
00485                 k = enclose(k);
00486             } while (k != m_size and k > mi);
00487             return kk;
00488         }
00489 
00491 
00495         size_type rmq(size_type l, size_type r)const {
00496             assert(l<=r);
00497             size_type m = rmq_open(l, r+1);
00498             if (m==size())
00499                 return r;
00500             else if (m==l)
00501                 return l;
00502             else { // m>l and m<=r
00503                 assert(0 == (*m_bp)[m-1]);
00504                 return m-1;
00505             }
00506         }
00507 
00509 
00514         size_type double_enclose(size_type i, size_type j)const {
00515             assert(j > i);
00516             assert((*m_bp)[i]==1 and (*m_bp)[j]==1);
00517             size_type k = rr_enclose(i, j);
00518             if (k == size())
00519                 return enclose(j);
00520             else
00521                 return enclose(k);
00522         }
00523 
00525 
00527         size_type preceding_closing_parentheses(size_type i)const {
00528             assert(i < m_size);
00529             if (!i) return 0;
00530             size_type ones = m_rank_bp(i);
00531             if (ones) { // ones > 0
00532                 assert(m_select_bp(ones) < i);
00533                 return i - m_select_bp(ones) - 1;
00534             } else {
00535                 return i;
00536             }
00537         }
00538 
00542         size_type size() const {
00543             return m_size;
00544         }
00545 
00547 
00551         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const {
00552             structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
00553             size_type written_bytes = 0;
00554             written_bytes += util::write_member(m_block_size, out, child, "block_size");
00555             written_bytes += util::write_member(m_size, out, child, "size");
00556             written_bytes += util::write_member(m_blocks, out, child, "block_cnt");
00557 
00558             written_bytes += m_rank_bp.serialize(out, child, "bp_rank");
00559             written_bytes += m_select_bp.serialize(out, child, "bp_select");
00560             written_bytes += m_nnd.serialize(out, child, "nearest_neighbour_dictionary");
00561 
00562             written_bytes += m_pioneer_bp.serialize(out, child, "pioneer_bp");
00563             if (m_bp != NULL and m_bp->size() > 0)
00564                 written_bytes += m_pioneer_bp_support->serialize(out, child, "pioneer_bp_support");
00565             structure_tree::add_size(child, written_bytes);
00566             return written_bytes;
00567         }
00568 
00570 
00574         void load(std::istream& in, const bit_vector* bp) {
00575             m_bp = bp;
00576             util::read_member(m_block_size, in);
00577             util::read_member(m_size, in);
00578             util::read_member(m_blocks, in);
00579 
00580             m_rank_bp.load(in, m_bp);
00581             m_select_bp.load(in, m_bp);
00582             m_nnd.load(in);
00583 
00584             m_pioneer_bp.load(in);
00585             if (m_pioneer_bp_support != NULL) {
00586                 delete m_pioneer_bp_support;
00587                 m_pioneer_bp_support = NULL;
00588             }
00589             if (m_bp != NULL and m_bp->size() > 0) {
00590                 m_pioneer_bp_support = new bp_support_gg<nnd_type, RankSupport, select_support_bs<RankSupport> >();
00591                 m_pioneer_bp_support->load(in, &m_pioneer_bp);
00592             }
00593         }
00594 
00595         std::string get_info()const {
00596             std::stringstream ss;
00597             ss<<"number of parentheses: "<<m_bp->size()<<std::endl;
00598 
00599             return ss.str();
00600         }
00601 };
00602 
00603 }// end namespace
00604 
00605 
00606 
00607 
00608 #endif