SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/rrr_helper.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2011 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00022 #ifndef SDSL_RRR_HELPER
00023 #define SDSL_RRR_HELPER
00024 
00025 #include <algorithm> // for next permutation
00026 #include <iostream>
00027 #include "bitmagic.hpp"
00028 #include "uint128_t.hpp"
00029 #include "uint256_t.hpp"
00030 
00031 namespace sdsl
00032 {
00033 
00035 
00037 template<uint16_t log_n>
00038 struct binomial_coefficients_trait {
00039     typedef uint64_t number_type;
00040     static inline uint16_t l1BP(number_type x) {
00041         return bit_magic::l1BP(x);
00042     }
00043 
00045 
00051     template<class bit_vector_type>
00052     static inline number_type get_int(const bit_vector_type& bv,
00053                                       typename bit_vector_type::size_type pos,
00054                                       uint16_t len) {
00055         return bv.get_int(pos, len);
00056     }
00057 
00059 
00065     template<class bit_vector_type>
00066     static void set_int(bit_vector_type& bv, typename bit_vector_type::size_type pos,
00067                         number_type x, uint16_t len) {
00068         bv.set_int(pos, x, len);
00069     }
00070 
00072 
00075     static inline uint16_t popcount(number_type x) {
00076         return bit_magic::b1Cnt(x);
00077     }
00078 };
00079 
00081 template<>
00082 struct binomial_coefficients_trait<7> {
00083     typedef uint128_t number_type;
00084     static inline uint16_t l1BP(number_type x) {
00085         if ((x >> 64)) {
00086             return bit_magic::l1BP(x >> 64) + 64;
00087         } else {
00088             return bit_magic::l1BP(x);
00089         }
00090     }
00091 
00092     template<class bit_vector_type>
00093     static inline number_type get_int(const bit_vector_type& bv,
00094                                       typename bit_vector_type::size_type pos,
00095                                       uint16_t len) {
00096         if (len <= 64) {
00097             return bv.get_int(pos, len);
00098         } else {
00099             return ((((number_type) bv.get_int(pos+64, len-64))<<64) + bv.get_int(pos, 64));
00100         }
00101     }
00102 
00103     template<class bit_vector_type>
00104     static void set_int(bit_vector_type& bv,
00105                         typename bit_vector_type::size_type pos,
00106                         number_type x, uint16_t len) {
00107         if (len <= 64) {
00108             bv.set_int(pos, x, len);
00109         } else {
00110             bv.set_int(pos, (uint64_t)x, 64); bv.set_int(pos+64, x>>64, len-64);
00111         }
00112     }
00113 
00114     static inline uint16_t popcount(number_type x) {
00115         return bit_magic::b1Cnt(x >> 64) + bit_magic::b1Cnt(x);
00116     }
00117 };
00118 
00120 template<>
00121 struct binomial_coefficients_trait<8> {
00122     typedef uint256_t number_type;
00123     static inline uint16_t l1BP(number_type x) {
00124         return x.l1BP();
00125     }
00126 
00127     template<class bit_vector_type>
00128     static inline number_type get_int(const bit_vector_type& bv,
00129                                       typename bit_vector_type::size_type pos,
00130                                       uint16_t len) {
00131         if (len <= 64) {
00132             return number_type(bv.get_int(pos, len));
00133         } else if (len <= 128) {
00134             return number_type(bv.get_int(pos, 64), bv.get_int(pos+64, len-64));
00135         } else if (len <= 192) {
00136             return number_type(bv.get_int(pos, 64), bv.get_int(pos + 64, 64),
00137                                (uint128_t)bv.get_int(pos + 128, len-128));
00138         } else { // > 192
00139             return number_type(bv.get_int(pos, 64), bv.get_int(pos+64, 64),
00140                                (((uint128_t)bv.get_int(pos+192, len-192))<<64) | bv.get_int(pos+128, 64));
00141         }
00142     }
00143 
00144     template<class bit_vector_type>
00145     static void set_int(bit_vector_type& bv,
00146                         typename bit_vector_type::size_type pos,
00147                         number_type x,
00148                         uint16_t len) {
00149         if (len <= 64) {
00150             bv.set_int(pos, x, len);
00151         } else if (len <= 128) {
00152             bv.set_int(pos, x, 64); bv.set_int(pos+64, x>>64, len-64);
00153         } else if (len <= 192) {
00154             bv.set_int(pos, x, 64); bv.set_int(pos+64, x>>64, 64);
00155             bv.set_int(pos+128, x>>128, len-128);
00156         } else { // > 192
00157             bv.set_int(pos, x, 64); bv.set_int(pos+64, x>>64, 64);
00158             bv.set_int(pos+128, x>>128, 64); bv.set_int(pos+192, x>>192, len-192);
00159         }
00160     }
00161 
00162     static inline uint16_t popcount(number_type x) {
00163         return x.popcount();
00164     }
00165 };
00166 
00168 
00174 template<uint16_t n>
00175 class binomial_coefficients
00176 {
00177     public:
00178         static class impl
00179         {
00180             public:
00181                 enum {MAX_LOG = (n>128 ? 8 : (n > 64 ? 7 : 6))};
00182                 typedef binomial_coefficients_trait<MAX_LOG> trait;
00183                 typedef typename trait::number_type number_type;
00184                 static const uint16_t MAX_SIZE = (1 << MAX_LOG);
00185                 number_type table[MAX_SIZE+1][MAX_SIZE+1];      // table for the binomial coefficients
00186                 uint16_t space[MAX_SIZE+1][MAX_SIZE+1];    // for entry i,j \lceil \log( {i \choose j}+1 ) \rceil
00187                 static const uint16_t BINARY_SEARCH_THRESHOLD = n/MAX_LOG;
00188                 number_type L1Mask[MAX_SIZE+1]; // L1Mask[i] contains a word with the i least significant bits set to 1.
00189                 // i.e. L1Mask[0] = 0, L1Mask[1] = 1,...
00190                 number_type O1Mask[MAX_SIZE]; // O1Mask[i] contains a word with
00191                 impl() {
00192                     for (uint16_t k=0; k <= MAX_SIZE; ++k) {
00193                         table[k][k] = 1;    // initialize diagonal
00194                         space[k][k] = 0;
00195                     }
00196                     for (uint16_t k=0; k <= MAX_SIZE; ++k) {
00197                         table[0][k] = 0;    // initialize first row
00198                         space[0][k] = 0;
00199                     }
00200                     for (uint16_t nn=0; nn <= MAX_SIZE; ++nn) {
00201                         table[nn][0] = 1;    // initialize first column
00202                         space[nn][0] = 0;
00203                     }
00204                     for (int nn=1; nn<=MAX_SIZE; ++nn) {
00205                         for (int k=1; k<=MAX_SIZE; ++k) {
00206                             table[nn][k] = table[nn-1][k-1] + table[nn-1][k];
00207                             space[nn][k] = (table[nn][k] == (number_type)1) ? 0 : trait::l1BP(table[nn][k]) + 1;
00208                         }
00209                     }
00210                     L1Mask[0] = 0;
00211                     number_type mask = 1;
00212                     O1Mask[0] = 1;
00213                     for (int i=1; i<=MAX_SIZE; ++i) {
00214                         L1Mask[i] = mask;
00215                         if (i < MAX_SIZE)
00216                             O1Mask[i] = O1Mask[i-1]<<1;
00217                         mask = (mask << 1);
00218                         mask |= (number_type)1;
00219                     }
00220                 }
00221         } data;
00222 };
00223 
00224 template<uint16_t n>
00225 typename binomial_coefficients<n>::impl binomial_coefficients<n>::data;
00226 
00228 
00239 template<uint16_t n>
00240 class rrr_helper
00241 {
00242     public:
00243         typedef binomial_coefficients<n> binomial; 
00244         typedef typename binomial::impl::number_type number_type; 
00245         typedef typename binomial::impl::trait trait; 
00246 
00248         static inline uint16_t space_for_bt(uint16_t i) {
00249             return binomial::data.space[n][i];
00250         }
00251 
00252         template<class bit_vector_type>
00253         static inline number_type decode_btnr(const bit_vector_type& bv,
00254                                               typename bit_vector_type::size_type btnrp, uint16_t btnrlen) {
00255             return trait::get_int(bv, btnrp, btnrlen);
00256         }
00257 
00258         template<class bit_vector_type>
00259         static void set_bt(bit_vector_type& bv, typename bit_vector_type::size_type pos,
00260                            number_type bt, uint16_t space_for_bt) {
00261             trait::set_int(bv, pos, bt, space_for_bt);
00262         }
00263 
00264         template<class bit_vector_type>
00265         static inline uint16_t get_bt(const bit_vector_type& bv, typename bit_vector_type::size_type pos,
00266                                       uint16_t block_size) {
00267             return trait::popcount(trait::get_int(bv, pos, block_size));
00268         }
00269 
00270         static inline number_type bin_to_nr(number_type bin) {
00271             if (bin == (number_type)0 or bin == binomial::data.L1Mask[n]) {  // handle special case
00272                 return 0;
00273             }
00274             number_type nr = 0;
00275             uint16_t  k  = trait::popcount(bin);
00276             uint16_t  nn = n; // size of the block
00277             while (bin != (number_type)0) {
00278                 if (1ULL & bin) {
00279                     nr += binomial::data.table[nn-1][k];
00280                     --k; // go to the case (n-1, k-1)
00281                 }// else go to the case (n-1, k)
00282                 bin = (bin >> 1);
00283                 --nn;
00284             }
00285             return nr;
00286         }
00287 
00289         static inline bool decode_bit(uint16_t k, number_type nr, uint16_t off) {
00290             if (k == n) {  // if n==k, then the encoded block consists only of ones
00291                 return 1;
00292             } else if (k == 0) { // if k==0 then the encoded block consists only of zeros
00293                 return 0;
00294             } else if (k == 1) { // if k==1 then the encoded block contains exactly on set bit at
00295                 return (n-nr-1) == off; // position n-nr-1
00296             }
00297             uint16_t nn = n;
00298             // if k < n \log n, it is better to do a binary search for each of the on bits
00299             if (k < binomial::data.BINARY_SEARCH_THRESHOLD) {
00300                 while (k > 1) {
00301                     uint16_t nn_lb = k, nn_rb = nn+1; // invariant nr >= binomial::data.table[nn_lb-1][k]
00302                     while (nn_lb < nn_rb) {
00303                         uint16_t nn_mid = (nn_lb + nn_rb) / 2;
00304                         if (nr >= binomial::data.table[nn_mid-1][k]) {
00305                             nn_lb = nn_mid+1;
00306                         } else {
00307                             nn_rb = nn_mid;
00308                         }
00309                     }
00310                     nn = nn_lb-1;
00311                     if (n-nn >= off) {
00312                         return (n-nn) == off;
00313                     }
00314                     nr -= binomial::data.table[nn-1][k];
00315                     --k;
00316                     --nn;
00317                 }
00318             } else { // else do a linear decoding
00319                 int i = 0;
00320                 while (k > 1) {
00321                     if (i > off) {
00322                         return 0;
00323                     }
00324                     if (nr >= binomial::data.table[nn-1][k]) {
00325                         nr -= binomial::data.table[nn-1][k];
00326                         --k;
00327                         if (i == off)
00328                             return 1;
00329                     }
00330                     --nn;
00331                     ++i;
00332                 }
00333             }
00334             return (n-nr-1) == off;
00335         }
00336 
00338         static inline uint16_t decode_popcount(uint16_t k, number_type nr, uint16_t off) {
00339             if (k == n) {  // if n==k, then the encoded block consists only of ones
00340                 return off;   // i.e. the answer is off
00341             } else if (k == 0) { // if k==0, then the encoded block consists only on zeros
00342                 return 0;    // i.e. the result is zero
00343             } else if (k == 1) { // if k==1 then the encoded block contains exactly on set bit at
00344                 return (n-nr-1) < off; // position n-nr-1, and popcount is 1 if off > (n-nr-1).
00345             }
00346             uint16_t result = 0;
00347             uint16_t nn = n;
00348             // if k < n \log n, it is better to do a binary search for each of the on bits
00349             if (k < binomial::data.BINARY_SEARCH_THRESHOLD) {
00350                 while (k > 1) {
00351                     uint16_t nn_lb = k, nn_rb = nn+1; // invariant nr >= binomial::data.table[nn_lb-1][k]
00352                     while (nn_lb < nn_rb) {
00353                         uint16_t nn_mid = (nn_lb + nn_rb) / 2;
00354                         if (nr >= binomial::data.table[nn_mid-1][k]) {
00355                             nn_lb = nn_mid+1;
00356                         } else {
00357                             nn_rb = nn_mid;
00358                         }
00359                     }
00360                     nn = nn_lb-1;
00361                     if (n-nn >= off) {
00362                         return result;
00363                     }
00364                     ++result;
00365                     nr -= binomial::data.table[nn-1][k];
00366                     --k;
00367                     --nn;
00368                 }
00369             } else {
00370                 int i = 0;
00371                 while (k > 1) {
00372                     if (i >= off) {
00373                         return result;
00374                     }
00375                     if (nr >= binomial::data.table[nn-1][k]) {
00376                         nr -= binomial::data.table[nn-1][k];
00377                         --k;
00378                         ++result;
00379                     }
00380                     --nn;
00381                     ++i;
00382                 }
00383             }
00384             return result + ((n-nr-1) < off);
00385         }
00386 
00389         static inline uint16_t decode_select(uint16_t k, number_type& nr, uint16_t sel) {
00390             if (k == n) {  // if n==k, then the encoded block consists only of ones
00391                 return sel-1;
00392             } else if (k == 1 and sel == 1) {
00393                 return n-nr-1;
00394             }
00395             uint16_t nn = n;
00396             // if k < n \log n, it is better to do a binary search for each of the on bits
00397             if (sel < binomial::data.BINARY_SEARCH_THRESHOLD) {
00398                 while (sel > 0) {
00399                     uint16_t nn_lb = k, nn_rb = nn+1; // invariant nr >= iii.m_coefficients[nn_lb-1]
00400                     while (nn_lb < nn_rb) {
00401                         uint16_t nn_mid = (nn_lb + nn_rb) / 2;
00402                         if (nr >= binomial::data.table[nn_mid-1][k]) {
00403                             nn_lb = nn_mid+1;
00404                         } else {
00405                             nn_rb = nn_mid;
00406                         }
00407                     }
00408                     nn = nn_lb-1;
00409                     nr -= binomial::data.table[nn-1][k];
00410                     --sel;
00411                     --nn;
00412                     --k;
00413                 }
00414                 return n-nn-1;
00415             } else {
00416                 int i = 0;
00417                 while (sel > 0) {   // TODO: this condition only work if the precondition holds
00418                     if (nr >= binomial::data.table[nn-1][k]) {
00419                         nr -= binomial::data.table[nn-1][k];
00420                         --sel;
00421                         --k;
00422                     }
00423                     --nn;
00424                     ++i;
00425                 }
00426                 return i-1;
00427             }
00428         }
00429 
00432         template<uint8_t pattern, uint8_t len>
00433         static inline uint16_t decode_select_bitpattern(uint16_t k, number_type& nr, uint16_t sel) {
00434             int i = 0;
00435             uint8_t decoded_pattern = 0;
00436             uint8_t decoded_len     = 0;
00437             uint16_t nn = n;
00438             while (sel > 0) {   // TODO: this condition only work if the precondition holds
00439                 decoded_pattern = decoded_pattern<<1;
00440                 ++decoded_len;
00441                 if (nr >= binomial::data.table[nn-1][k]) {
00442                     nr -= binomial::data.table[nn-1][k];
00443                     // a one is decoded
00444                     decoded_pattern |= 1; // add to the pattern
00445                     --k;
00446                 }
00447                 --nn;
00448                 ++i;
00449                 if (decoded_len == len) {  // if decoded pattern length equals len of the searched pattern
00450                     if (decoded_pattern == pattern) {  // and pattern equals the searched pattern
00451                         --sel;
00452                     }
00453                     decoded_pattern = 0; decoded_len = 0; // reset pattern
00454                 }
00455             }
00456             return i-len; // return the starting position of $sel$th occurence of the pattern
00457         }
00458 
00459 };
00460 
00461 } // end namespace
00462 #endif