SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2009 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef SDSL_ELIAS_DELTA_CODER 00022 #define SDSL_ELIAS_DELTA_CODER 00023 00024 #include "int_vector.hpp" 00025 00026 namespace sdsl 00027 { 00028 00029 namespace coder 00030 { 00031 00033 class elias_delta 00034 { 00035 public: 00036 typedef uint64_t size_type; 00037 00039 00043 static const uint32_t EliasDeltaPrefixSum[1<<16]; 00044 00045 static const uint16_t EliasDeltaPrefixSum8bit[(1<<8)*8]; 00046 static const uint8_t min_codeword_length = 1; // 1 represents 1 and is the code word with minimum length 00047 static uint8_t encoding_length(uint64_t); 00049 /* \param data Bitstring 00050 \param start_idx Starting index of the decoding. 00051 \param n Number of values to decode from the bitstring. 00052 \param it Iterator to decode the values. 00053 */ 00054 template<bool sumup, bool increment,class Iterator> 00055 static uint64_t decode(const uint64_t* data, const size_type start_idx, size_type n, Iterator it=(Iterator)NULL); 00056 00058 00062 static uint64_t decode_prefix_sum(const uint64_t* data, const size_type start_idx, size_type n); 00063 static uint64_t decode_prefix_sum(const uint64_t* data, const size_type start_idx, const size_type end_idx, size_type n); 00064 00065 template<class int_vector> 00066 static bool encode(const int_vector& v, int_vector& z); 00067 template<class int_vector> 00068 static bool decode(const int_vector& z, int_vector& v); 00069 00071 /* \param x Positive integer to encode. 00072 \param z Raw data of vector to write the encoded form of x. 00073 \param start_idx Beginning bit index to write the encoded form ox x in z. 00074 */ 00075 static void encode(uint64_t x, uint64_t*& z, uint8_t& offset); 00076 00077 template<class int_vector> 00078 static uint64_t* raw_data(int_vector& v) { 00079 return v.m_data; 00080 } 00081 }; 00082 00083 // \sa coder::elias_delta::encoding_length 00084 inline uint8_t elias_delta::encoding_length(uint64_t w) 00085 { 00086 uint8_t len_1 = w ? bit_magic::l1BP(w) : 64; 00087 return len_1 + (bit_magic::l1BP(len_1+1)<<1) + 1; 00088 } 00089 00090 template<class int_vector> 00091 bool elias_delta::encode(const int_vector& v, int_vector& z) 00092 { 00093 typedef typename int_vector::size_type size_type; 00094 z.set_int_width(v.get_int_width()); 00095 size_type z_bit_size = 0; 00096 uint64_t w; 00097 const uint64_t zero_val = v.get_int_width() < 64 ? (1ULL)<<v.get_int_width() : 0; 00098 for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it) { 00099 if ((w=*it) == 0) { 00100 w = zero_val; 00101 } 00102 z_bit_size += encoding_length(w); 00103 } 00104 z.bit_resize(z_bit_size); // Initial size of z 00105 if (z_bit_size & 0x3F) { // if z_bit_size % 64 != 0 00106 *(z.m_data + (z_bit_size>>6)) = 0; // initialize last word 00107 } 00108 z_bit_size = 0; 00109 uint64_t* z_data = z.m_data; 00110 uint8_t offset=0; 00111 size_type len, len_1_len; // TODO: change to uint8_t and test it 00112 for (typename int_vector::const_iterator it = v.begin(), end=v.end(); it != end; ++it) { 00113 w = *it; 00114 if (w == 0) { 00115 w = zero_val; 00116 } 00117 // (number of bits to represent w) 00118 len = w ? bit_magic::l1BP(w)+1 : 65; 00119 // (number of bits to represent the length of w) -1 00120 len_1_len = bit_magic::l1BP(len); 00121 // Write unary representation for the length of the length of w 00122 bit_magic::write_int_and_move(z_data, 1ULL << len_1_len, offset, len_1_len+1); 00123 if (len_1_len) { 00124 bit_magic::write_int_and_move(z_data, len, offset, len_1_len); 00125 bit_magic::write_int_and_move(z_data, w, offset, len-1); 00126 } 00127 } 00128 return true; 00129 } 00130 00131 inline void elias_delta::encode(uint64_t x, uint64_t*& z, uint8_t& offset) 00132 { 00133 // if (x == 0) { 00134 // throw std::logic_error("elias_delta::encode(uint64_t x, uint64_t* &z, uint8_t &offset); x equals 0 that cannot be encoded!"); 00135 // } 00136 uint8_t len, len_1_len; 00137 // (number of bits to represent w) 00138 len = x ? bit_magic::l1BP(x)+1 : 65; 00139 // (number of bits to represent the length of w) - 1 00140 len_1_len = bit_magic::l1BP(len); 00141 // Write unary representation for the length of the length of w 00142 bit_magic::write_int_and_move(z, 1ULL << len_1_len, offset, len_1_len+1); 00143 if (len_1_len) { 00144 bit_magic::write_int_and_move(z, len, offset, len_1_len); 00145 bit_magic::write_int_and_move(z, x, offset, len-1); 00146 } 00147 } 00148 00149 template<class int_vector> 00150 bool elias_delta::decode(const int_vector& z, int_vector& v) 00151 { 00152 typename int_vector::size_type len_1_len, len, n = 0; 00153 const uint64_t* z_data = z.data(); 00154 const uint64_t* z_end = z.data()+(z.bit_size()>>6); 00155 uint8_t offset = 0; 00156 while ((z_data < z_end) or (z_data==z_end and offset < (z.bit_size()&0x3F))) { 00157 len_1_len = bit_magic::readUnaryIntAndMove(z_data, offset); 00158 if (len_1_len) { 00159 len = bit_magic::read_int_and_move(z_data, offset, len_1_len) + (1 << len_1_len); 00160 bit_magic::move_right(z_data, offset, len-1); 00161 } 00162 ++n; 00163 } 00164 v.set_int_width(z.get_int_width()); 00165 v.resize(n); 00166 return decode<false, true>(z.data(), 0, n, v.begin()); 00167 } 00168 00169 template<bool sumup, bool increment, class Iterator> 00170 inline uint64_t elias_delta::decode(const uint64_t* data, const size_type start_idx, size_type n, Iterator it) 00171 { 00172 data += (start_idx >> 6); 00173 uint64_t value = 0; 00174 size_type i = 0; 00175 size_type len_1_len, len; 00176 uint8_t offset = start_idx & 0x3F; 00177 while (i++ < n) {// while not all values are decoded 00178 if (!sumup) value = 0; 00179 len_1_len = bit_magic::readUnaryIntAndMove(data, offset); // read length of length of x 00180 if (!len_1_len) { 00181 value += 1; 00182 } else { 00183 len = bit_magic::read_int_and_move(data, offset, len_1_len) + (1ULL << len_1_len); 00184 value += bit_magic::read_int_and_move(data, offset, len-1) + (len-1<64) * (1ULL << (len-1)); 00185 } 00186 if (increment) *(it++) = value; 00187 } 00188 return value; 00189 } 00190 00191 } // end namespace coder 00192 } // end namespace sdsl 00193 00194 #endif