SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2009 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef SDSL_FIBONACCI_CODER 00022 #define SDSL_FIBONACCI_CODER 00023 00024 #include "int_vector.hpp" 00025 00026 namespace sdsl 00027 { 00028 00029 namespace coder 00030 { 00031 00033 class fibonacci 00034 { 00035 public: 00037 static const uint64_t Fib2bin_0_95[(1<<12)*8]; 00038 00040 00044 static const uint8_t Fib2binShift[(1<<13)]; 00045 00047 00050 static const uint16_t Fib2bin_0_16_greedy[1<<16]; 00051 00052 typedef uint64_t size_type; 00053 00054 static const uint8_t min_codeword_length = 2; // 11 represents 1 and is the code word with minimum length 00056 00058 static uint8_t encoding_length(uint64_t w); 00060 /* \param data Bitstring 00061 \param start_idx Starting index of the decoding. 00062 \param n Number of values to decode from the bitstring. 00063 \param it Iterator 00064 */ 00065 template<bool sumup, bool increment, class Iterator> 00066 static uint64_t decode(const uint64_t* data, const size_type start_idx, size_type n, Iterator it=(Iterator)NULL); 00067 00068 template<bool sumup, bool increment, class Iterator> 00069 static uint64_t decode1(const uint64_t* data, const size_type start_idx, size_type n, Iterator it=(Iterator)NULL); 00070 00071 00072 00074 00078 static uint64_t decode_prefix_sum(const uint64_t* data, const size_type start_idx, size_type n); 00079 00081 00083 static uint64_t decode_prefix_sum(const uint64_t* data, const size_type start_idx, const size_type end_idx, size_type n); 00084 00085 template<class int_vector1, class int_vector2> 00086 static bool encode(const int_vector1& v, int_vector2& z); 00087 00088 template<class int_vector> 00089 static uint64_t* raw_data(int_vector& v) { 00090 return v.m_data; 00091 }; 00092 00094 00098 static void encode(uint64_t x, uint64_t*& z, uint8_t& offset); 00099 00100 template<class int_vector1, class int_vector2> 00101 static bool decode(const int_vector1& z, int_vector2& v); 00102 }; 00103 00104 inline uint8_t fibonacci::encoding_length(uint64_t w) 00105 { 00106 if (w == 0) { 00107 return 93; 00108 } 00109 // This limit for the leftmost 1bit in the resulting fib code could be improved using a table 00110 uint8_t len_1 = bit_magic::l1BP(w); // len-1 of the fib code 00111 while (++len_1 < (uint8_t)(sizeof(bit_magic::Fib)/sizeof(bit_magic::Fib[0])) && w >= bit_magic::Fib[len_1]); 00112 return len_1+1; 00113 } 00114 00115 template<class int_vector1, class int_vector2> 00116 inline bool fibonacci::encode(const int_vector1& v, int_vector2& z) 00117 { 00118 uint64_t z_bit_size = 0; 00119 register uint64_t w; 00120 const uint64_t zero_val = v.get_int_width() < 64 ? (1ULL)<<v.get_int_width() : 0; 00121 for (typename int_vector1::const_iterator it=v.begin(), end = v.end(); it != end; ++it) { 00122 if ((w=*it) == 0) { 00123 if (v.get_int_width() < 64) { 00124 w = zero_val; 00125 } 00126 } 00127 z_bit_size += encoding_length(w); 00128 } 00129 z.bit_resize(z_bit_size); 00130 if (z_bit_size & 0x3F) { // if z_bit_size % 64 != 0 00131 *(z.m_data + (z_bit_size>>6)) = 0; // initialize last word 00132 } 00133 uint64_t* z_data = z.m_data; 00134 uint8_t offset = 0; 00135 register uint64_t fibword_high = 0x0000000000000001ULL, fibword_low; 00136 register uint64_t t; 00137 for (typename int_vector1::const_iterator it=v.begin(), end = v.end(); it != end; ++it) { 00138 w = *it; 00139 if (w == 0) { 00140 w = zero_val; 00141 } 00142 int8_t len_1 = encoding_length(w)-1,j; 00143 fibword_low = 0x0000000000000001ULL; 00144 00145 if (len_1 >= 64) { // length > 65 00146 fibword_high = 0x0000000000000001ULL; 00147 j = len_1-1; 00148 if (w == 0) { // handle special case 00149 fibword_high <<= 1; 00150 fibword_high |= 1; 00151 fibword_high <<= 1; 00152 w -= bit_magic::Fib[len_1-1]; 00153 j -= 2; 00154 } 00155 for (; j>63; --j) { 00156 fibword_high <<= 1; 00157 if (w >= (t=bit_magic::Fib[j])) { 00158 w -= t; 00159 fibword_high |= 1; 00160 if (w and j>64) { 00161 fibword_high <<= 1; 00162 --j; 00163 } else { 00164 fibword_high <<= (64-j); 00165 break; 00166 } 00167 } 00168 } 00169 j = 64; 00170 } else { 00171 j = len_1-1; 00172 } 00173 00174 for (; j >= 0; --j) { 00175 fibword_low <<= 1; 00176 if (w >= (t=bit_magic::Fib[j])) { 00177 w -= t; 00178 fibword_low |= 1; 00179 if (w) { 00180 fibword_low <<= 1; 00181 --j; 00182 } else { 00183 fibword_low <<= (j); 00184 break; 00185 } 00186 } 00187 } 00188 if (len_1 >=64) { 00189 bit_magic::write_int_and_move(z_data, fibword_low, offset, 64); 00190 bit_magic::write_int_and_move(z_data, fibword_high, offset, len_1 - 63); 00191 } else { 00192 bit_magic::write_int_and_move(z_data, fibword_low, offset, (len_1&0x3F) +1); 00193 } 00194 } 00195 z.set_int_width(v.get_int_width()); 00196 return true; 00197 } 00198 00199 inline void fibonacci::encode(uint64_t x, uint64_t*& z, uint8_t& offset) 00200 { 00201 register uint64_t fibword_high = 0x0000000000000001ULL, fibword_low; 00202 register uint64_t t; 00203 int8_t len_1 = encoding_length(x)-1,j; 00204 fibword_low = 0x0000000000000001ULL; 00205 00206 if (len_1 >= 64) { // length > 65 00207 fibword_high = 0x0000000000000001ULL; 00208 j = len_1-1; 00209 if (x == 0) { // handle special case 00210 fibword_high <<= 1; 00211 fibword_high |= 1; 00212 fibword_high <<= 1; 00213 x -= bit_magic::Fib[len_1-1]; 00214 j -= 2; 00215 } 00216 for (; j>63; --j) { 00217 fibword_high <<= 1; 00218 if (x >= (t=bit_magic::Fib[j])) { 00219 x -= t; 00220 fibword_high |= 1; 00221 if (x and j>64) { 00222 fibword_high <<= 1; 00223 --j; 00224 } else { 00225 fibword_high <<= (64-j); 00226 break; 00227 } 00228 } 00229 } 00230 j = 64; 00231 } else { 00232 j = len_1-1; 00233 } 00234 for (; j >= 0; --j) { 00235 fibword_low <<= 1; 00236 if (x >= (t=bit_magic::Fib[j])) { 00237 x -= t; 00238 fibword_low |= 1; 00239 if (x) { 00240 fibword_low <<= 1; 00241 --j; 00242 } else { 00243 fibword_low <<= (j); 00244 break; 00245 } 00246 } 00247 } 00248 if (len_1 >=64) { 00249 bit_magic::write_int_and_move(z, fibword_low, offset, 64); 00250 bit_magic::write_int_and_move(z, fibword_high, offset, len_1 - 63); 00251 } else { 00252 bit_magic::write_int_and_move(z, fibword_low, offset, (len_1&0x3F) +1); 00253 } 00254 } 00255 00256 template<class int_vector1, class int_vector2> 00257 bool fibonacci::decode(const int_vector1& z, int_vector2& v) 00258 { 00259 uint64_t n = 0, carry = 0; // n = number of values to be decoded 00260 const uint64_t* data = z.data(); 00261 // Determine size of v 00262 if (z.empty()) {// if z is empty we are ready with decoding 00263 v.set_int_width(z.get_int_width()); 00264 v.resize(0); 00265 return true; 00266 } 00267 for (typename int_vector1::size_type i=0; i < (z.capacity()>>6)-1; ++i, ++data) { 00268 n += bit_magic::b11Cnt(*data, carry); 00269 } 00270 if (z.capacity() != z.bit_size()) { 00271 n += bit_magic::b11Cnt((*data) & bit_magic::Li1Mask[z.bit_size()&0x3F], carry); 00272 } else { 00273 n += bit_magic::b11Cnt(*data, carry); 00274 } 00275 std::cout<<"n="<<n<<std::endl; 00276 v.set_int_width(z.get_int_width()); v.resize(n); 00277 return decode<false, true>(z.data(), 0, n, v.begin()); 00278 } 00279 00280 template<bool sumup, bool increment, class Iterator> 00281 inline uint64_t fibonacci::decode(const uint64_t* data, const size_type start_idx, size_type n, Iterator it) 00282 { 00283 data += (start_idx >> 6); 00284 uint64_t w = 0, value = 0; 00285 int8_t buffered = 0; // bits buffered in w, in 0..64 00286 int8_t read = start_idx & 0x3F; // read bits in current *data 0..63 00287 int8_t shift = 0; 00288 uint32_t fibtable = 0; 00289 while (n) {// while not all values are decoded 00290 while (buffered < 13 and bit_magic::b11Cnt(w) < n) { 00291 w |= (((*data)>>read)<<buffered); 00292 if (read >= buffered) { 00293 ++data; 00294 buffered += 64-read; 00295 read = 0; 00296 } else { // read < buffered 00297 read += 64-buffered; 00298 buffered = 64; 00299 } 00300 } 00301 value += Fib2bin_0_95[(fibtable<<12) | (w&0xFFF)]; 00302 shift = Fib2binShift[w&0x1FFF]; 00303 if (shift > 0) {// if end of decoding 00304 w >>= shift; 00305 buffered -= shift; 00306 if (increment) *(it++) = value; 00307 if (!sumup and n!=1) value = 0; 00308 fibtable = 0; 00309 --n; 00310 } else { // not end of decoding 00311 w >>= 12; 00312 buffered -= 12; 00313 ++fibtable; 00314 } 00315 } 00316 return value; 00317 } 00318 00319 template<bool sumup, bool increment, class Iterator> 00320 inline uint64_t fibonacci::decode1(const uint64_t* data, const size_type start_idx, size_type n, Iterator it) 00321 { 00322 data += (start_idx >> 6); 00323 uint64_t w = 0, value = 0; 00324 int8_t buffered = 0; // bits buffered in w, in 0..64 00325 int8_t read = start_idx & 0x3F; // read bits in current *data 0..63 00326 int8_t shift = 0; 00327 uint32_t fibtable = 0; 00328 uint8_t blocknr = (start_idx>>6)%9; 00329 while (n) {// while not all values are decoded 00330 while (buffered < 13 and bit_magic::b11Cnt(w) < n) { 00331 w |= (((*data)>>read)<<buffered); 00332 if (read >= buffered) { 00333 ++blocknr; 00334 ++data; 00335 if (blocknr==8) { 00336 ++data; 00337 blocknr=0; 00338 } 00339 buffered += 64-read; 00340 read = 0; 00341 } else { // read < buffered 00342 read += 64-buffered; 00343 buffered = 64; 00344 } 00345 } 00346 value += Fib2bin_0_95[(fibtable<<12) | (w&0xFFF)]; 00347 shift = Fib2binShift[w&0x1FFF]; 00348 if (shift > 0) {// if end of decoding 00349 w >>= shift; 00350 buffered -= shift; 00351 if (increment) *(it++) = value; 00352 if (!sumup) 00353 value = 0; 00354 fibtable = 0; 00355 --n; 00356 } else { // not end of decoding 00357 w >>= 12; 00358 buffered -= 12; 00359 ++fibtable; 00360 } 00361 } 00362 return value; 00363 } 00364 00365 } // end namespace coder 00366 } // end namespace sdsl 00367 00368 #endif