SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/ternary_coder.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2009 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef SDSL_TERNARY_CODER
00022 #define SDSL_TERNARY_CODER
00023 
00024 #include "int_vector.hpp"
00025 
00026 namespace sdsl
00027 {
00028 
00029 namespace coder
00030 {
00031 
00033 class ternary
00034 {
00035     public:
00036         typedef uint64_t size_type;
00037 
00039 
00045         static const uint16_t Trny2bin_0_16[1<<16];
00046 
00047         static const uint16_t Trny2bin_0_16_greedy[1<<16];
00048 
00049 
00050 
00051         static const uint8_t min_codeword_length = 4; // 11 01 represents 1 and is the code word with minumum length
00052 
00053         static uint8_t encoding_length(uint64_t w);
00054 
00055         template<bool sumup, bool increment, class Iterator>
00056         static uint64_t decode(const uint64_t* data, const size_type start_idx, size_type n, Iterator it=(Iterator)NULL);
00057 
00058         static uint64_t decode_prefix_sum(const uint64_t* data, const size_type start_idx, size_type n);
00059         static uint64_t decode_prefix_sum(const uint64_t* data, const size_type start_idx, const size_type end_idx, size_type n);
00060 
00061         template<class int_vector>
00062         static bool encode(const int_vector& v, int_vector& z);
00063         template<class int_vector>
00064         static bool decode(const int_vector& z, int_vector& v);
00065 
00067         /* \param x Positive integer to encode.
00068            \param z Raw data of vector to write the encoded form of x.
00069            \param start_idx Beginning bit index to write the encoded form ox x in z.
00070         */
00071         static void encode(uint64_t x, uint64_t*& z, uint8_t& offset);
00072 
00073         template<class int_vector>
00074         static uint64_t* raw_data(int_vector& v) {
00075             return v.m_data;
00076         };
00077 };
00078 
00079 inline uint8_t ternary::encoding_length(uint64_t w)
00080 {
00081     if (w==0) return 2;
00082     uint8_t res = 2;   // add two for the terminating "11" bit pair
00083     do {
00084         w/=3;
00085         res+=2;
00086     } while (w!=0);
00087     return res;
00088 }
00089 
00090 template<class int_vector>
00091 bool ternary::encode(const int_vector& v, int_vector& z)
00092 {
00093     z.set_int_width(v.get_int_width());
00094     typename int_vector::size_type z_bit_size = 0;
00095     for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it) {
00096         z_bit_size += encoding_length(*it);
00097     }
00098     z.bit_resize(z_bit_size);   // Initial size of z
00099     if (z_bit_size & 0x3F) { // if z_bit_size % 64 != 0
00100         *(z.m_data + (z_bit_size>>6)) = 0; // initialize last word
00101     }
00102     z_bit_size = 0;
00103     uint64_t* z_data = z.m_data, *z_temp;
00104     uint8_t offset=0, off_temp;
00105     uint64_t w=0;
00106     typename int_vector::size_type len=0;
00107     for (typename int_vector::const_iterator it = v.begin(), end=v.end(); it != end; ++it) {
00108         w = *it;
00109         if (w == 0) {
00110             throw std::logic_error("ternary::encode(const int_vector &v, int_vector &z); entry of v equals 0 that cannot be encoded!");
00111         }
00112         // (length of bits to represent w)
00113         len = encoding_length(*it)-2;
00114         if ((offset=(offset + len)) > 63) {
00115             z_data += (offset>>6);
00116         }
00117         offset &= 0x3F;
00118         z_temp = z_data;
00119         off_temp = offset;
00120         if ((offset=(offset+2)) > 63) {
00121             ++z_data;
00122         }
00123         offset &= 0x3F;
00124 //              bool not_zero = false;
00125         // write ternary number
00126         bit_magic::write_int(z_temp, 3, off_temp, 2);
00127         for (int32_t i=len; i>0; i-=2) {
00128             if (off_temp != 0)
00129                 off_temp -= 2;
00130             else {
00131                 off_temp = 62;
00132                 --z_temp;
00133             }
00134 //                      if( (w%3)>0 )
00135 //                              not_zero = true;
00136             bit_magic::write_int(z_temp, w%3, off_temp, 2);
00137             w/=3;
00138         }
00139 //              assert(not_zero);
00140     }
00141     return true;
00142 }
00143 
00144 template<class int_vector>
00145 bool ternary::decode(const int_vector& z, int_vector& v)
00146 {
00147     uint64_t n = 0; // n = number of values to be decoded
00148     const uint64_t* data = z.data();
00149     // Determine size of v
00150     if (z.empty()) {// if z is empty we are ready with decoding
00151         v.set_int_width(z.get_int_width());
00152         v.resize(0);
00153         return true;
00154     }
00155     for (typename int_vector::size_type i=0; i < (z.capacity()>>6)-1; ++i, ++data) {
00156         n += bit_magic::eB11Cnt(*data);
00157     }
00158     if (z.capacity() != z.bit_size()) {
00159         n += bit_magic::eB11Cnt((*data) & bit_magic::Li1Mask[z.bit_size()&0x3F]);
00160     } else {
00161         n += bit_magic::eB11Cnt(*data);
00162     }
00163     v.set_int_width(z.get_int_width()); v.resize(n);
00164     return decode<false, true>(z.data(), 0, n, v.begin());
00165 }
00166 
00167 template<bool sumup, bool increment, class Iterator>
00168 inline uint64_t ternary::decode(const uint64_t* data, const size_type start_idx, size_type n, Iterator it)
00169 {
00170     data += (start_idx >> 6);
00171     size_type i = 0;
00172     uint64_t w = 0, value = 0, tempv=0;
00173     uint32_t v;
00174     int8_t buffered = 0; // bits buffered in w, in 0..64
00175     int8_t read = start_idx & 0x3F; // read bits in current *data 0..63
00176     int8_t shift = 0;
00177     while (i < n) {// while not all values are decoded
00178         while (buffered < 16 and i + bit_magic::eB11Cnt(w) < n) {
00179             w |= (((*data)>>read)<<buffered);
00180             if (read >= buffered) {
00181                 ++data;
00182                 buffered += 64-read;
00183                 read = 0;
00184             } else { // read < buffered
00185                 read += 64-buffered;
00186                 buffered = 64;
00187             }
00188         }
00189         v = Trny2bin_0_16[w&0xFFFF];
00190         //if shift is 16 and the most significant bit pair is not 11
00191         if (((v>>13)==7) && (w&0xC000)!=0xC000) {// not end of decoding
00192             tempv *= 6561; // tempv *= 3**8
00193             tempv += v&0x1FFF;
00194             w >>= 16;
00195             buffered -= 16;
00196         } else { // end of decoding
00197             shift = ((v>>13)+1)<<1;
00198             w >>= shift;
00199             buffered -= shift;
00200             tempv *= bit_magic::powerOf3[v>>13];
00201             tempv += v&0xFFF;
00202             if (sumup)
00203                 value += tempv;
00204             else
00205                 value = tempv;
00206             if (increment) *(it++) = value;
00207             tempv = 0;
00208             ++i;
00209         }
00210     }
00211     return value;
00212 }
00213 
00214 } // end namespace coder
00215 } // end namespace sdsl
00216 
00217 #endif