SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/fibonacci_coder.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2009 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef SDSL_FIBONACCI_CODER
00022 #define SDSL_FIBONACCI_CODER
00023 
00024 #include "int_vector.hpp"
00025 
00026 namespace sdsl
00027 {
00028 
00029 namespace coder
00030 {
00031 
00033 class fibonacci
00034 {
00035     public:
00037         static const uint64_t Fib2bin_0_95[(1<<12)*8];
00038 
00040 
00044         static const uint8_t Fib2binShift[(1<<13)];
00045 
00047 
00050         static const uint16_t Fib2bin_0_16_greedy[1<<16];
00051 
00052         typedef uint64_t size_type;
00053 
00054         static const uint8_t min_codeword_length = 2; // 11 represents 1 and is the code word with minimum length
00056 
00058         static uint8_t encoding_length(uint64_t w);
00060         /* \param data Bitstring
00061            \param start_idx Starting index of the decoding.
00062            \param n Number of values to decode from the bitstring.
00063            \param it Iterator
00064          */
00065         template<bool sumup, bool increment, class Iterator>
00066         static uint64_t decode(const uint64_t* data, const size_type start_idx, size_type n, Iterator it=(Iterator)NULL);
00067 
00068         template<bool sumup, bool increment, class Iterator>
00069         static uint64_t decode1(const uint64_t* data, const size_type start_idx, size_type n, Iterator it=(Iterator)NULL);
00070 
00071 
00072 
00074 
00078         static uint64_t decode_prefix_sum(const uint64_t* data, const size_type start_idx, size_type n);
00079 
00081 
00083         static uint64_t decode_prefix_sum(const uint64_t* data, const size_type start_idx, const size_type end_idx, size_type n);
00084 
00085         template<class int_vector1, class int_vector2>
00086         static bool encode(const int_vector1& v, int_vector2& z);
00087 
00088         template<class int_vector>
00089         static uint64_t* raw_data(int_vector& v) {
00090             return v.m_data;
00091         };
00092 
00094 
00098         static void encode(uint64_t x, uint64_t*& z, uint8_t& offset);
00099 
00100         template<class int_vector1, class int_vector2>
00101         static bool decode(const int_vector1& z, int_vector2& v);
00102 };
00103 
00104 inline uint8_t fibonacci::encoding_length(uint64_t w)
00105 {
00106     if (w == 0) {
00107         return 93;
00108     }
00109     // This limit for the leftmost 1bit in the resulting fib code could be improved using a table
00110     uint8_t len_1 = bit_magic::l1BP(w); // len-1 of the fib code
00111     while (++len_1 < (uint8_t)(sizeof(bit_magic::Fib)/sizeof(bit_magic::Fib[0])) && w >= bit_magic::Fib[len_1]);
00112     return len_1+1;
00113 }
00114 
00115 template<class int_vector1, class int_vector2>
00116 inline bool fibonacci::encode(const int_vector1& v, int_vector2& z)
00117 {
00118     uint64_t z_bit_size = 0;
00119     register uint64_t w;
00120     const uint64_t zero_val = v.get_int_width() < 64 ? (1ULL)<<v.get_int_width() : 0;
00121     for (typename int_vector1::const_iterator it=v.begin(), end = v.end(); it != end; ++it) {
00122         if ((w=*it) == 0) {
00123             if (v.get_int_width() < 64) {
00124                 w = zero_val;
00125             }
00126         }
00127         z_bit_size += encoding_length(w);
00128     }
00129     z.bit_resize(z_bit_size);
00130     if (z_bit_size & 0x3F) { // if z_bit_size % 64 != 0
00131         *(z.m_data + (z_bit_size>>6)) = 0; // initialize last word
00132     }
00133     uint64_t* z_data    = z.m_data;
00134     uint8_t offset              = 0;
00135     register uint64_t fibword_high = 0x0000000000000001ULL, fibword_low;
00136     register uint64_t t;
00137     for (typename int_vector1::const_iterator it=v.begin(), end = v.end(); it != end; ++it) {
00138         w = *it;
00139         if (w == 0) {
00140             w = zero_val;
00141         }
00142         int8_t len_1 = encoding_length(w)-1,j;
00143         fibword_low = 0x0000000000000001ULL;
00144 
00145         if (len_1 >= 64) { // length > 65
00146             fibword_high = 0x0000000000000001ULL;
00147             j = len_1-1;
00148             if (w == 0) { // handle special case
00149                 fibword_high <<= 1;
00150                 fibword_high |= 1;
00151                 fibword_high <<= 1;
00152                 w -= bit_magic::Fib[len_1-1];
00153                 j -= 2;
00154             }
00155             for (; j>63; --j) {
00156                 fibword_high <<= 1;
00157                 if (w >= (t=bit_magic::Fib[j])) {
00158                     w -= t;
00159                     fibword_high |= 1;
00160                     if (w and j>64) {
00161                         fibword_high <<= 1;
00162                         --j;
00163                     } else {
00164                         fibword_high <<= (64-j);
00165                         break;
00166                     }
00167                 }
00168             }
00169             j           = 64;
00170         } else {
00171             j = len_1-1;
00172         }
00173 
00174         for (; j >= 0; --j) {
00175             fibword_low <<= 1;
00176             if (w >= (t=bit_magic::Fib[j])) {
00177                 w -= t;
00178                 fibword_low |= 1;
00179                 if (w) {
00180                     fibword_low <<= 1;
00181                     --j;
00182                 } else {
00183                     fibword_low <<= (j);
00184                     break;
00185                 }
00186             }
00187         }
00188         if (len_1 >=64) {
00189             bit_magic::write_int_and_move(z_data, fibword_low, offset, 64);
00190             bit_magic::write_int_and_move(z_data, fibword_high, offset, len_1 - 63);
00191         } else {
00192             bit_magic::write_int_and_move(z_data, fibword_low, offset, (len_1&0x3F) +1);
00193         }
00194     }
00195     z.set_int_width(v.get_int_width());
00196     return true;
00197 }
00198 
00199 inline void fibonacci::encode(uint64_t x, uint64_t*& z, uint8_t& offset)
00200 {
00201     register uint64_t fibword_high = 0x0000000000000001ULL, fibword_low;
00202     register uint64_t t;
00203     int8_t len_1 = encoding_length(x)-1,j;
00204     fibword_low = 0x0000000000000001ULL;
00205 
00206     if (len_1 >= 64) { // length > 65
00207         fibword_high = 0x0000000000000001ULL;
00208         j = len_1-1;
00209         if (x == 0) { // handle special case
00210             fibword_high <<= 1;
00211             fibword_high |= 1;
00212             fibword_high <<= 1;
00213             x -= bit_magic::Fib[len_1-1];
00214             j -= 2;
00215         }
00216         for (; j>63; --j) {
00217             fibword_high <<= 1;
00218             if (x >= (t=bit_magic::Fib[j])) {
00219                 x -= t;
00220                 fibword_high |= 1;
00221                 if (x and j>64) {
00222                     fibword_high <<= 1;
00223                     --j;
00224                 } else {
00225                     fibword_high <<= (64-j);
00226                     break;
00227                 }
00228             }
00229         }
00230         j       = 64;
00231     } else {
00232         j = len_1-1;
00233     }
00234     for (; j >= 0; --j) {
00235         fibword_low <<= 1;
00236         if (x >= (t=bit_magic::Fib[j])) {
00237             x -= t;
00238             fibword_low |= 1;
00239             if (x) {
00240                 fibword_low <<= 1;
00241                 --j;
00242             } else {
00243                 fibword_low <<= (j);
00244                 break;
00245             }
00246         }
00247     }
00248     if (len_1 >=64) {
00249         bit_magic::write_int_and_move(z, fibword_low, offset, 64);
00250         bit_magic::write_int_and_move(z, fibword_high, offset, len_1 - 63);
00251     } else {
00252         bit_magic::write_int_and_move(z, fibword_low, offset, (len_1&0x3F) +1);
00253     }
00254 }
00255 
00256 template<class int_vector1, class int_vector2>
00257 bool fibonacci::decode(const int_vector1& z, int_vector2& v)
00258 {
00259     uint64_t n = 0, carry = 0; // n = number of values to be decoded
00260     const uint64_t* data = z.data();
00261     // Determine size of v
00262     if (z.empty()) {// if z is empty we are ready with decoding
00263         v.set_int_width(z.get_int_width());
00264         v.resize(0);
00265         return true;
00266     }
00267     for (typename int_vector1::size_type i=0; i < (z.capacity()>>6)-1; ++i, ++data) {
00268         n += bit_magic::b11Cnt(*data, carry);
00269     }
00270     if (z.capacity() != z.bit_size()) {
00271         n += bit_magic::b11Cnt((*data) & bit_magic::Li1Mask[z.bit_size()&0x3F], carry);
00272     } else {
00273         n += bit_magic::b11Cnt(*data, carry);
00274     }
00275     std::cout<<"n="<<n<<std::endl;
00276     v.set_int_width(z.get_int_width()); v.resize(n);
00277     return decode<false, true>(z.data(), 0, n, v.begin());
00278 }
00279 
00280 template<bool sumup, bool increment, class Iterator>
00281 inline uint64_t fibonacci::decode(const uint64_t* data, const size_type start_idx, size_type n, Iterator it)
00282 {
00283     data += (start_idx >> 6);
00284     uint64_t w = 0, value = 0;
00285     int8_t buffered = 0; // bits buffered in w, in 0..64
00286     int8_t read = start_idx & 0x3F; // read bits in current *data 0..63
00287     int8_t shift = 0;
00288     uint32_t fibtable = 0;
00289     while (n) {// while not all values are decoded
00290         while (buffered < 13 and bit_magic::b11Cnt(w) < n) {
00291             w |= (((*data)>>read)<<buffered);
00292             if (read >= buffered) {
00293                 ++data;
00294                 buffered += 64-read;
00295                 read = 0;
00296             } else { // read < buffered
00297                 read += 64-buffered;
00298                 buffered = 64;
00299             }
00300         }
00301         value += Fib2bin_0_95[(fibtable<<12) | (w&0xFFF)];
00302         shift  = Fib2binShift[w&0x1FFF];
00303         if (shift > 0) {// if end of decoding
00304             w >>= shift;
00305             buffered -= shift;
00306             if (increment) *(it++) = value;
00307             if (!sumup and n!=1) value = 0;
00308             fibtable = 0;
00309             --n;
00310         } else { // not end of decoding
00311             w >>= 12;
00312             buffered -= 12;
00313             ++fibtable;
00314         }
00315     }
00316     return value;
00317 }
00318 
00319 template<bool sumup, bool increment, class Iterator>
00320 inline uint64_t fibonacci::decode1(const uint64_t* data, const size_type start_idx, size_type n, Iterator it)
00321 {
00322     data += (start_idx >> 6);
00323     uint64_t w = 0, value = 0;
00324     int8_t buffered = 0; // bits buffered in w, in 0..64
00325     int8_t read = start_idx & 0x3F; // read bits in current *data 0..63
00326     int8_t shift = 0;
00327     uint32_t fibtable = 0;
00328     uint8_t blocknr = (start_idx>>6)%9;
00329     while (n) {// while not all values are decoded
00330         while (buffered < 13 and bit_magic::b11Cnt(w) < n) {
00331             w |= (((*data)>>read)<<buffered);
00332             if (read >= buffered) {
00333                 ++blocknr;
00334                 ++data;
00335                 if (blocknr==8) {
00336                     ++data;
00337                     blocknr=0;
00338                 }
00339                 buffered += 64-read;
00340                 read = 0;
00341             } else { // read < buffered
00342                 read += 64-buffered;
00343                 buffered = 64;
00344             }
00345         }
00346         value += Fib2bin_0_95[(fibtable<<12) | (w&0xFFF)];
00347         shift  = Fib2binShift[w&0x1FFF];
00348         if (shift > 0) {// if end of decoding
00349             w >>= shift;
00350             buffered -= shift;
00351             if (increment) *(it++) = value;
00352             if (!sumup)
00353                 value = 0;
00354             fibtable = 0;
00355             --n;
00356         } else { // not end of decoding
00357             w >>= 12;
00358             buffered -= 12;
00359             ++fibtable;
00360         }
00361     }
00362     return value;
00363 }
00364 
00365 } // end namespace coder
00366 } // end namespace sdsl
00367 
00368 #endif