SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2009 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef SDSL_TERNARY_CODER 00022 #define SDSL_TERNARY_CODER 00023 00024 #include "int_vector.hpp" 00025 00026 namespace sdsl 00027 { 00028 00029 namespace coder 00030 { 00031 00033 class ternary 00034 { 00035 public: 00036 typedef uint64_t size_type; 00037 00039 00045 static const uint16_t Trny2bin_0_16[1<<16]; 00046 00047 static const uint16_t Trny2bin_0_16_greedy[1<<16]; 00048 00049 00050 00051 static const uint8_t min_codeword_length = 4; // 11 01 represents 1 and is the code word with minumum length 00052 00053 static uint8_t encoding_length(uint64_t w); 00054 00055 template<bool sumup, bool increment, class Iterator> 00056 static uint64_t decode(const uint64_t* data, const size_type start_idx, size_type n, Iterator it=(Iterator)NULL); 00057 00058 static uint64_t decode_prefix_sum(const uint64_t* data, const size_type start_idx, size_type n); 00059 static uint64_t decode_prefix_sum(const uint64_t* data, const size_type start_idx, const size_type end_idx, size_type n); 00060 00061 template<class int_vector> 00062 static bool encode(const int_vector& v, int_vector& z); 00063 template<class int_vector> 00064 static bool decode(const int_vector& z, int_vector& v); 00065 00067 /* \param x Positive integer to encode. 00068 \param z Raw data of vector to write the encoded form of x. 00069 \param start_idx Beginning bit index to write the encoded form ox x in z. 00070 */ 00071 static void encode(uint64_t x, uint64_t*& z, uint8_t& offset); 00072 00073 template<class int_vector> 00074 static uint64_t* raw_data(int_vector& v) { 00075 return v.m_data; 00076 }; 00077 }; 00078 00079 inline uint8_t ternary::encoding_length(uint64_t w) 00080 { 00081 if (w==0) return 2; 00082 uint8_t res = 2; // add two for the terminating "11" bit pair 00083 do { 00084 w/=3; 00085 res+=2; 00086 } while (w!=0); 00087 return res; 00088 } 00089 00090 template<class int_vector> 00091 bool ternary::encode(const int_vector& v, int_vector& z) 00092 { 00093 z.set_int_width(v.get_int_width()); 00094 typename int_vector::size_type z_bit_size = 0; 00095 for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it) { 00096 z_bit_size += encoding_length(*it); 00097 } 00098 z.bit_resize(z_bit_size); // Initial size of z 00099 if (z_bit_size & 0x3F) { // if z_bit_size % 64 != 0 00100 *(z.m_data + (z_bit_size>>6)) = 0; // initialize last word 00101 } 00102 z_bit_size = 0; 00103 uint64_t* z_data = z.m_data, *z_temp; 00104 uint8_t offset=0, off_temp; 00105 uint64_t w=0; 00106 typename int_vector::size_type len=0; 00107 for (typename int_vector::const_iterator it = v.begin(), end=v.end(); it != end; ++it) { 00108 w = *it; 00109 if (w == 0) { 00110 throw std::logic_error("ternary::encode(const int_vector &v, int_vector &z); entry of v equals 0 that cannot be encoded!"); 00111 } 00112 // (length of bits to represent w) 00113 len = encoding_length(*it)-2; 00114 if ((offset=(offset + len)) > 63) { 00115 z_data += (offset>>6); 00116 } 00117 offset &= 0x3F; 00118 z_temp = z_data; 00119 off_temp = offset; 00120 if ((offset=(offset+2)) > 63) { 00121 ++z_data; 00122 } 00123 offset &= 0x3F; 00124 // bool not_zero = false; 00125 // write ternary number 00126 bit_magic::write_int(z_temp, 3, off_temp, 2); 00127 for (int32_t i=len; i>0; i-=2) { 00128 if (off_temp != 0) 00129 off_temp -= 2; 00130 else { 00131 off_temp = 62; 00132 --z_temp; 00133 } 00134 // if( (w%3)>0 ) 00135 // not_zero = true; 00136 bit_magic::write_int(z_temp, w%3, off_temp, 2); 00137 w/=3; 00138 } 00139 // assert(not_zero); 00140 } 00141 return true; 00142 } 00143 00144 template<class int_vector> 00145 bool ternary::decode(const int_vector& z, int_vector& v) 00146 { 00147 uint64_t n = 0; // n = number of values to be decoded 00148 const uint64_t* data = z.data(); 00149 // Determine size of v 00150 if (z.empty()) {// if z is empty we are ready with decoding 00151 v.set_int_width(z.get_int_width()); 00152 v.resize(0); 00153 return true; 00154 } 00155 for (typename int_vector::size_type i=0; i < (z.capacity()>>6)-1; ++i, ++data) { 00156 n += bit_magic::eB11Cnt(*data); 00157 } 00158 if (z.capacity() != z.bit_size()) { 00159 n += bit_magic::eB11Cnt((*data) & bit_magic::Li1Mask[z.bit_size()&0x3F]); 00160 } else { 00161 n += bit_magic::eB11Cnt(*data); 00162 } 00163 v.set_int_width(z.get_int_width()); v.resize(n); 00164 return decode<false, true>(z.data(), 0, n, v.begin()); 00165 } 00166 00167 template<bool sumup, bool increment, class Iterator> 00168 inline uint64_t ternary::decode(const uint64_t* data, const size_type start_idx, size_type n, Iterator it) 00169 { 00170 data += (start_idx >> 6); 00171 size_type i = 0; 00172 uint64_t w = 0, value = 0, tempv=0; 00173 uint32_t v; 00174 int8_t buffered = 0; // bits buffered in w, in 0..64 00175 int8_t read = start_idx & 0x3F; // read bits in current *data 0..63 00176 int8_t shift = 0; 00177 while (i < n) {// while not all values are decoded 00178 while (buffered < 16 and i + bit_magic::eB11Cnt(w) < n) { 00179 w |= (((*data)>>read)<<buffered); 00180 if (read >= buffered) { 00181 ++data; 00182 buffered += 64-read; 00183 read = 0; 00184 } else { // read < buffered 00185 read += 64-buffered; 00186 buffered = 64; 00187 } 00188 } 00189 v = Trny2bin_0_16[w&0xFFFF]; 00190 //if shift is 16 and the most significant bit pair is not 11 00191 if (((v>>13)==7) && (w&0xC000)!=0xC000) {// not end of decoding 00192 tempv *= 6561; // tempv *= 3**8 00193 tempv += v&0x1FFF; 00194 w >>= 16; 00195 buffered -= 16; 00196 } else { // end of decoding 00197 shift = ((v>>13)+1)<<1; 00198 w >>= shift; 00199 buffered -= shift; 00200 tempv *= bit_magic::powerOf3[v>>13]; 00201 tempv += v&0xFFF; 00202 if (sumup) 00203 value += tempv; 00204 else 00205 value = tempv; 00206 if (increment) *(it++) = value; 00207 tempv = 0; 00208 ++i; 00209 } 00210 } 00211 return value; 00212 } 00213 00214 } // end namespace coder 00215 } // end namespace sdsl 00216 00217 #endif