SDSL: Succinct Data Structure Library: sdsl/include/sdsl/elias_delta

Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2009 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef SDSL_ELIAS_DELTA_CODER
00022 #define SDSL_ELIAS_DELTA_CODER
00023 
00024 #include "int_vector.hpp"
00025 
00026 namespace sdsl
00027 {
00028 
00029 namespace coder
00030 {
00031 
00033 class elias_delta
00034 {
00035     public:
00036         typedef uint64_t size_type;
00037 
00039 
00043         static const uint32_t EliasDeltaPrefixSum[1<<16];
00044 
00045         static const uint16_t EliasDeltaPrefixSum8bit[(1<<8)*8];
00046         static const uint8_t min_codeword_length = 1; // 1 represents 1 and is the code word with minimum length
00047         static uint8_t encoding_length(uint64_t);
00049         /* \param data Bitstring
00050            \param start_idx Starting index of the decoding.
00051            \param n Number of values to decode from the bitstring.
00052            \param it Iterator to decode the values.
00053          */
00054         template<bool sumup, bool increment,class Iterator>
00055         static uint64_t decode(const uint64_t* data, const size_type start_idx, size_type n, Iterator it=(Iterator)NULL);
00056 
00058 
00062         static uint64_t decode_prefix_sum(const uint64_t* data, const size_type start_idx, size_type n);
00063         static uint64_t decode_prefix_sum(const uint64_t* data, const size_type start_idx, const size_type end_idx, size_type n);
00064 
00065         template<class int_vector>
00066         static bool encode(const int_vector& v, int_vector& z);
00067         template<class int_vector>
00068         static bool decode(const int_vector& z, int_vector& v);
00069 
00071         /* \param x Positive integer to encode.
00072            \param z Raw data of vector to write the encoded form of x.
00073            \param start_idx Beginning bit index to write the encoded form ox x in z.
00074         */
00075         static void encode(uint64_t x, uint64_t*& z, uint8_t& offset);
00076 
00077         template<class int_vector>
00078         static uint64_t* raw_data(int_vector& v) {
00079             return v.m_data;
00080         }
00081 };
00082 
00083 // \sa coder::elias_delta::encoding_length
00084 inline uint8_t elias_delta::encoding_length(uint64_t w)
00085 {
00086     uint8_t len_1 = w ? bit_magic::l1BP(w) : 64;
00087     return len_1 + (bit_magic::l1BP(len_1+1)<<1) + 1;
00088 }
00089 
00090 template<class int_vector>
00091 bool elias_delta::encode(const int_vector& v, int_vector& z)
00092 {
00093     typedef typename int_vector::size_type size_type;
00094     z.set_int_width(v.get_int_width());
00095     size_type z_bit_size = 0;
00096     uint64_t w;
00097     const uint64_t zero_val = v.get_int_width() < 64 ? (1ULL)<<v.get_int_width() : 0;
00098     for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it) {
00099         if ((w=*it) == 0) {
00100             w = zero_val;
00101         }
00102         z_bit_size += encoding_length(w);
00103     }
00104     z.bit_resize(z_bit_size);   // Initial size of z
00105     if (z_bit_size & 0x3F) { // if z_bit_size % 64 != 0
00106         *(z.m_data + (z_bit_size>>6)) = 0; // initialize last word
00107     }
00108     z_bit_size = 0;
00109     uint64_t* z_data = z.m_data;
00110     uint8_t offset=0;
00111     size_type len, len_1_len; // TODO: change to uint8_t and test it
00112     for (typename int_vector::const_iterator it = v.begin(), end=v.end(); it != end; ++it) {
00113         w = *it;
00114         if (w == 0) {
00115             w = zero_val;
00116         }
00117         // (number of bits to represent w)
00118         len             = w ? bit_magic::l1BP(w)+1 : 65;
00119         // (number of bits to represent the length of w) -1
00120         len_1_len       = bit_magic::l1BP(len);
00121         // Write unary representation for the length of the length of w
00122         bit_magic::write_int_and_move(z_data, 1ULL << len_1_len, offset, len_1_len+1);
00123         if (len_1_len) {
00124             bit_magic::write_int_and_move(z_data, len, offset, len_1_len);
00125             bit_magic::write_int_and_move(z_data, w, offset, len-1);
00126         }
00127     }
00128     return true;
00129 }
00130 
00131 inline void elias_delta::encode(uint64_t x, uint64_t*& z, uint8_t& offset)
00132 {
00133 //    if (x == 0) {
00134 //        throw std::logic_error("elias_delta::encode(uint64_t x, uint64_t* &z, uint8_t &offset); x equals 0 that cannot be encoded!");
00135 //    }
00136     uint8_t len, len_1_len;
00137     // (number of bits to represent w)
00138     len = x ? bit_magic::l1BP(x)+1 : 65;
00139     // (number of bits to represent the length of w) - 1
00140     len_1_len   = bit_magic::l1BP(len);
00141     // Write unary representation for the length of the length of w
00142     bit_magic::write_int_and_move(z, 1ULL << len_1_len, offset, len_1_len+1);
00143     if (len_1_len) {
00144         bit_magic::write_int_and_move(z, len, offset, len_1_len);
00145         bit_magic::write_int_and_move(z, x, offset, len-1);
00146     }
00147 }
00148 
00149 template<class int_vector>
00150 bool elias_delta::decode(const int_vector& z, int_vector& v)
00151 {
00152     typename int_vector::size_type len_1_len, len, n = 0;
00153     const uint64_t* z_data      = z.data();
00154     const uint64_t* z_end       = z.data()+(z.bit_size()>>6);
00155     uint8_t offset              = 0;
00156     while ((z_data < z_end) or (z_data==z_end and offset < (z.bit_size()&0x3F))) {
00157         len_1_len = bit_magic::readUnaryIntAndMove(z_data, offset);
00158         if (len_1_len) {
00159             len         = bit_magic::read_int_and_move(z_data, offset, len_1_len) + (1 << len_1_len);
00160             bit_magic::move_right(z_data, offset, len-1);
00161         }
00162         ++n;
00163     }
00164     v.set_int_width(z.get_int_width());
00165     v.resize(n);
00166     return decode<false, true>(z.data(), 0, n, v.begin());
00167 }
00168 
00169 template<bool sumup, bool increment, class Iterator>
00170 inline uint64_t elias_delta::decode(const uint64_t* data, const size_type start_idx, size_type n, Iterator it)
00171 {
00172     data += (start_idx >> 6);
00173     uint64_t value = 0;
00174     size_type i = 0;
00175     size_type len_1_len, len;
00176     uint8_t offset = start_idx & 0x3F;
00177     while (i++ < n) {// while not all values are decoded
00178         if (!sumup) value = 0;
00179         len_1_len = bit_magic::readUnaryIntAndMove(data, offset); // read length of length of x
00180         if (!len_1_len) {
00181             value += 1;
00182         } else {
00183             len         =  bit_magic::read_int_and_move(data, offset, len_1_len) + (1ULL << len_1_len);
00184             value       += bit_magic::read_int_and_move(data, offset, len-1) + (len-1<64) * (1ULL << (len-1));
00185         }
00186         if (increment) *(it++) = value;
00187     }
00188     return value;
00189 }
00190 
00191 } // end namespace coder
00192 } // end namespace sdsl
00193 
00194 #endif