SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2011 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00022 #ifndef SDSL_RRR_HELPER 00023 #define SDSL_RRR_HELPER 00024 00025 #include <algorithm> // for next permutation 00026 #include <iostream> 00027 #include "bitmagic.hpp" 00028 #include "uint128_t.hpp" 00029 #include "uint256_t.hpp" 00030 00031 namespace sdsl 00032 { 00033 00035 00037 template<uint16_t log_n> 00038 struct binomial_coefficients_trait { 00039 typedef uint64_t number_type; 00040 static inline uint16_t l1BP(number_type x) { 00041 return bit_magic::l1BP(x); 00042 } 00043 00045 00051 template<class bit_vector_type> 00052 static inline number_type get_int(const bit_vector_type& bv, 00053 typename bit_vector_type::size_type pos, 00054 uint16_t len) { 00055 return bv.get_int(pos, len); 00056 } 00057 00059 00065 template<class bit_vector_type> 00066 static void set_int(bit_vector_type& bv, typename bit_vector_type::size_type pos, 00067 number_type x, uint16_t len) { 00068 bv.set_int(pos, x, len); 00069 } 00070 00072 00075 static inline uint16_t popcount(number_type x) { 00076 return bit_magic::b1Cnt(x); 00077 } 00078 }; 00079 00081 template<> 00082 struct binomial_coefficients_trait<7> { 00083 typedef uint128_t number_type; 00084 static inline uint16_t l1BP(number_type x) { 00085 if ((x >> 64)) { 00086 return bit_magic::l1BP(x >> 64) + 64; 00087 } else { 00088 return bit_magic::l1BP(x); 00089 } 00090 } 00091 00092 template<class bit_vector_type> 00093 static inline number_type get_int(const bit_vector_type& bv, 00094 typename bit_vector_type::size_type pos, 00095 uint16_t len) { 00096 if (len <= 64) { 00097 return bv.get_int(pos, len); 00098 } else { 00099 return ((((number_type) bv.get_int(pos+64, len-64))<<64) + bv.get_int(pos, 64)); 00100 } 00101 } 00102 00103 template<class bit_vector_type> 00104 static void set_int(bit_vector_type& bv, 00105 typename bit_vector_type::size_type pos, 00106 number_type x, uint16_t len) { 00107 if (len <= 64) { 00108 bv.set_int(pos, x, len); 00109 } else { 00110 bv.set_int(pos, (uint64_t)x, 64); bv.set_int(pos+64, x>>64, len-64); 00111 } 00112 } 00113 00114 static inline uint16_t popcount(number_type x) { 00115 return bit_magic::b1Cnt(x >> 64) + bit_magic::b1Cnt(x); 00116 } 00117 }; 00118 00120 template<> 00121 struct binomial_coefficients_trait<8> { 00122 typedef uint256_t number_type; 00123 static inline uint16_t l1BP(number_type x) { 00124 return x.l1BP(); 00125 } 00126 00127 template<class bit_vector_type> 00128 static inline number_type get_int(const bit_vector_type& bv, 00129 typename bit_vector_type::size_type pos, 00130 uint16_t len) { 00131 if (len <= 64) { 00132 return number_type(bv.get_int(pos, len)); 00133 } else if (len <= 128) { 00134 return number_type(bv.get_int(pos, 64), bv.get_int(pos+64, len-64)); 00135 } else if (len <= 192) { 00136 return number_type(bv.get_int(pos, 64), bv.get_int(pos + 64, 64), 00137 (uint128_t)bv.get_int(pos + 128, len-128)); 00138 } else { // > 192 00139 return number_type(bv.get_int(pos, 64), bv.get_int(pos+64, 64), 00140 (((uint128_t)bv.get_int(pos+192, len-192))<<64) | bv.get_int(pos+128, 64)); 00141 } 00142 } 00143 00144 template<class bit_vector_type> 00145 static void set_int(bit_vector_type& bv, 00146 typename bit_vector_type::size_type pos, 00147 number_type x, 00148 uint16_t len) { 00149 if (len <= 64) { 00150 bv.set_int(pos, x, len); 00151 } else if (len <= 128) { 00152 bv.set_int(pos, x, 64); bv.set_int(pos+64, x>>64, len-64); 00153 } else if (len <= 192) { 00154 bv.set_int(pos, x, 64); bv.set_int(pos+64, x>>64, 64); 00155 bv.set_int(pos+128, x>>128, len-128); 00156 } else { // > 192 00157 bv.set_int(pos, x, 64); bv.set_int(pos+64, x>>64, 64); 00158 bv.set_int(pos+128, x>>128, 64); bv.set_int(pos+192, x>>192, len-192); 00159 } 00160 } 00161 00162 static inline uint16_t popcount(number_type x) { 00163 return x.popcount(); 00164 } 00165 }; 00166 00168 00174 template<uint16_t n> 00175 class binomial_coefficients 00176 { 00177 public: 00178 static class impl 00179 { 00180 public: 00181 enum {MAX_LOG = (n>128 ? 8 : (n > 64 ? 7 : 6))}; 00182 typedef binomial_coefficients_trait<MAX_LOG> trait; 00183 typedef typename trait::number_type number_type; 00184 static const uint16_t MAX_SIZE = (1 << MAX_LOG); 00185 number_type table[MAX_SIZE+1][MAX_SIZE+1]; // table for the binomial coefficients 00186 uint16_t space[MAX_SIZE+1][MAX_SIZE+1]; // for entry i,j \lceil \log( {i \choose j}+1 ) \rceil 00187 static const uint16_t BINARY_SEARCH_THRESHOLD = n/MAX_LOG; 00188 number_type L1Mask[MAX_SIZE+1]; // L1Mask[i] contains a word with the i least significant bits set to 1. 00189 // i.e. L1Mask[0] = 0, L1Mask[1] = 1,... 00190 number_type O1Mask[MAX_SIZE]; // O1Mask[i] contains a word with 00191 impl() { 00192 for (uint16_t k=0; k <= MAX_SIZE; ++k) { 00193 table[k][k] = 1; // initialize diagonal 00194 space[k][k] = 0; 00195 } 00196 for (uint16_t k=0; k <= MAX_SIZE; ++k) { 00197 table[0][k] = 0; // initialize first row 00198 space[0][k] = 0; 00199 } 00200 for (uint16_t nn=0; nn <= MAX_SIZE; ++nn) { 00201 table[nn][0] = 1; // initialize first column 00202 space[nn][0] = 0; 00203 } 00204 for (int nn=1; nn<=MAX_SIZE; ++nn) { 00205 for (int k=1; k<=MAX_SIZE; ++k) { 00206 table[nn][k] = table[nn-1][k-1] + table[nn-1][k]; 00207 space[nn][k] = (table[nn][k] == (number_type)1) ? 0 : trait::l1BP(table[nn][k]) + 1; 00208 } 00209 } 00210 L1Mask[0] = 0; 00211 number_type mask = 1; 00212 O1Mask[0] = 1; 00213 for (int i=1; i<=MAX_SIZE; ++i) { 00214 L1Mask[i] = mask; 00215 if (i < MAX_SIZE) 00216 O1Mask[i] = O1Mask[i-1]<<1; 00217 mask = (mask << 1); 00218 mask |= (number_type)1; 00219 } 00220 } 00221 } data; 00222 }; 00223 00224 template<uint16_t n> 00225 typename binomial_coefficients<n>::impl binomial_coefficients<n>::data; 00226 00228 00239 template<uint16_t n> 00240 class rrr_helper 00241 { 00242 public: 00243 typedef binomial_coefficients<n> binomial; 00244 typedef typename binomial::impl::number_type number_type; 00245 typedef typename binomial::impl::trait trait; 00246 00248 static inline uint16_t space_for_bt(uint16_t i) { 00249 return binomial::data.space[n][i]; 00250 } 00251 00252 template<class bit_vector_type> 00253 static inline number_type decode_btnr(const bit_vector_type& bv, 00254 typename bit_vector_type::size_type btnrp, uint16_t btnrlen) { 00255 return trait::get_int(bv, btnrp, btnrlen); 00256 } 00257 00258 template<class bit_vector_type> 00259 static void set_bt(bit_vector_type& bv, typename bit_vector_type::size_type pos, 00260 number_type bt, uint16_t space_for_bt) { 00261 trait::set_int(bv, pos, bt, space_for_bt); 00262 } 00263 00264 template<class bit_vector_type> 00265 static inline uint16_t get_bt(const bit_vector_type& bv, typename bit_vector_type::size_type pos, 00266 uint16_t block_size) { 00267 return trait::popcount(trait::get_int(bv, pos, block_size)); 00268 } 00269 00270 static inline number_type bin_to_nr(number_type bin) { 00271 if (bin == (number_type)0 or bin == binomial::data.L1Mask[n]) { // handle special case 00272 return 0; 00273 } 00274 number_type nr = 0; 00275 uint16_t k = trait::popcount(bin); 00276 uint16_t nn = n; // size of the block 00277 while (bin != (number_type)0) { 00278 if (1ULL & bin) { 00279 nr += binomial::data.table[nn-1][k]; 00280 --k; // go to the case (n-1, k-1) 00281 }// else go to the case (n-1, k) 00282 bin = (bin >> 1); 00283 --nn; 00284 } 00285 return nr; 00286 } 00287 00289 static inline bool decode_bit(uint16_t k, number_type nr, uint16_t off) { 00290 if (k == n) { // if n==k, then the encoded block consists only of ones 00291 return 1; 00292 } else if (k == 0) { // if k==0 then the encoded block consists only of zeros 00293 return 0; 00294 } else if (k == 1) { // if k==1 then the encoded block contains exactly on set bit at 00295 return (n-nr-1) == off; // position n-nr-1 00296 } 00297 uint16_t nn = n; 00298 // if k < n \log n, it is better to do a binary search for each of the on bits 00299 if (k < binomial::data.BINARY_SEARCH_THRESHOLD) { 00300 while (k > 1) { 00301 uint16_t nn_lb = k, nn_rb = nn+1; // invariant nr >= binomial::data.table[nn_lb-1][k] 00302 while (nn_lb < nn_rb) { 00303 uint16_t nn_mid = (nn_lb + nn_rb) / 2; 00304 if (nr >= binomial::data.table[nn_mid-1][k]) { 00305 nn_lb = nn_mid+1; 00306 } else { 00307 nn_rb = nn_mid; 00308 } 00309 } 00310 nn = nn_lb-1; 00311 if (n-nn >= off) { 00312 return (n-nn) == off; 00313 } 00314 nr -= binomial::data.table[nn-1][k]; 00315 --k; 00316 --nn; 00317 } 00318 } else { // else do a linear decoding 00319 int i = 0; 00320 while (k > 1) { 00321 if (i > off) { 00322 return 0; 00323 } 00324 if (nr >= binomial::data.table[nn-1][k]) { 00325 nr -= binomial::data.table[nn-1][k]; 00326 --k; 00327 if (i == off) 00328 return 1; 00329 } 00330 --nn; 00331 ++i; 00332 } 00333 } 00334 return (n-nr-1) == off; 00335 } 00336 00338 static inline uint16_t decode_popcount(uint16_t k, number_type nr, uint16_t off) { 00339 if (k == n) { // if n==k, then the encoded block consists only of ones 00340 return off; // i.e. the answer is off 00341 } else if (k == 0) { // if k==0, then the encoded block consists only on zeros 00342 return 0; // i.e. the result is zero 00343 } else if (k == 1) { // if k==1 then the encoded block contains exactly on set bit at 00344 return (n-nr-1) < off; // position n-nr-1, and popcount is 1 if off > (n-nr-1). 00345 } 00346 uint16_t result = 0; 00347 uint16_t nn = n; 00348 // if k < n \log n, it is better to do a binary search for each of the on bits 00349 if (k < binomial::data.BINARY_SEARCH_THRESHOLD) { 00350 while (k > 1) { 00351 uint16_t nn_lb = k, nn_rb = nn+1; // invariant nr >= binomial::data.table[nn_lb-1][k] 00352 while (nn_lb < nn_rb) { 00353 uint16_t nn_mid = (nn_lb + nn_rb) / 2; 00354 if (nr >= binomial::data.table[nn_mid-1][k]) { 00355 nn_lb = nn_mid+1; 00356 } else { 00357 nn_rb = nn_mid; 00358 } 00359 } 00360 nn = nn_lb-1; 00361 if (n-nn >= off) { 00362 return result; 00363 } 00364 ++result; 00365 nr -= binomial::data.table[nn-1][k]; 00366 --k; 00367 --nn; 00368 } 00369 } else { 00370 int i = 0; 00371 while (k > 1) { 00372 if (i >= off) { 00373 return result; 00374 } 00375 if (nr >= binomial::data.table[nn-1][k]) { 00376 nr -= binomial::data.table[nn-1][k]; 00377 --k; 00378 ++result; 00379 } 00380 --nn; 00381 ++i; 00382 } 00383 } 00384 return result + ((n-nr-1) < off); 00385 } 00386 00389 static inline uint16_t decode_select(uint16_t k, number_type& nr, uint16_t sel) { 00390 if (k == n) { // if n==k, then the encoded block consists only of ones 00391 return sel-1; 00392 } else if (k == 1 and sel == 1) { 00393 return n-nr-1; 00394 } 00395 uint16_t nn = n; 00396 // if k < n \log n, it is better to do a binary search for each of the on bits 00397 if (sel < binomial::data.BINARY_SEARCH_THRESHOLD) { 00398 while (sel > 0) { 00399 uint16_t nn_lb = k, nn_rb = nn+1; // invariant nr >= iii.m_coefficients[nn_lb-1] 00400 while (nn_lb < nn_rb) { 00401 uint16_t nn_mid = (nn_lb + nn_rb) / 2; 00402 if (nr >= binomial::data.table[nn_mid-1][k]) { 00403 nn_lb = nn_mid+1; 00404 } else { 00405 nn_rb = nn_mid; 00406 } 00407 } 00408 nn = nn_lb-1; 00409 nr -= binomial::data.table[nn-1][k]; 00410 --sel; 00411 --nn; 00412 --k; 00413 } 00414 return n-nn-1; 00415 } else { 00416 int i = 0; 00417 while (sel > 0) { // TODO: this condition only work if the precondition holds 00418 if (nr >= binomial::data.table[nn-1][k]) { 00419 nr -= binomial::data.table[nn-1][k]; 00420 --sel; 00421 --k; 00422 } 00423 --nn; 00424 ++i; 00425 } 00426 return i-1; 00427 } 00428 } 00429 00432 template<uint8_t pattern, uint8_t len> 00433 static inline uint16_t decode_select_bitpattern(uint16_t k, number_type& nr, uint16_t sel) { 00434 int i = 0; 00435 uint8_t decoded_pattern = 0; 00436 uint8_t decoded_len = 0; 00437 uint16_t nn = n; 00438 while (sel > 0) { // TODO: this condition only work if the precondition holds 00439 decoded_pattern = decoded_pattern<<1; 00440 ++decoded_len; 00441 if (nr >= binomial::data.table[nn-1][k]) { 00442 nr -= binomial::data.table[nn-1][k]; 00443 // a one is decoded 00444 decoded_pattern |= 1; // add to the pattern 00445 --k; 00446 } 00447 --nn; 00448 ++i; 00449 if (decoded_len == len) { // if decoded pattern length equals len of the searched pattern 00450 if (decoded_pattern == pattern) { // and pattern equals the searched pattern 00451 --sel; 00452 } 00453 decoded_pattern = 0; decoded_len = 0; // reset pattern 00454 } 00455 } 00456 return i-len; // return the starting position of $sel$th occurence of the pattern 00457 } 00458 00459 }; 00460 00461 } // end namespace 00462 #endif