SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/enc_vector_dna.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2008 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #include "int_vector.hpp"
00022 #include "fibonacci_coder.hpp"
00023 #include "iterators.hpp"
00024 
00025 #ifndef SDSL_ENC_VECTOR_DNA
00026 #define SDSL_ENC_VECTOR_DNA
00027 
00029 namespace sdsl
00030 {
00031 
00032 template<uint8_t fixedIntWidth>
00033 struct enc_vector_dna_trait {
00034     typedef int_vector<0> int_vector_type;
00035 };
00036 
00037 template<>
00038 struct enc_vector_dna_trait<32> {
00039     typedef int_vector<32> int_vector_type;
00040 };
00041 
00043 
00049 template<uint32_t SampleDens = 8, uint8_t fixedIntWidth=0>
00050 class enc_vector_dna
00051 {
00052     public:
00053         typedef uint64_t                                                        value_type;     // STL Container requirement
00054         typedef random_access_const_iterator<enc_vector_dna> iterator;// STL Container requirement
00055         typedef iterator                                                        const_iterator; // STL Container requirement
00056         typedef const value_type                                        reference;
00057         typedef const value_type                                        const_reference;
00058         typedef const value_type*                                       const_pointer;
00059         typedef ptrdiff_t                                                       difference_type;// STL Container requirement
00060         typedef int_vector<>::size_type                         size_type;              // STL Container requirement
00061         typedef coder::fibonacci                                        coder;
00062         static  const uint32_t                                          sample_dens     = SampleDens;
00063 
00064         int_vector<0>   m_z;            // compressed bit stream
00065     private:
00066         typename enc_vector_dna_trait<fixedIntWidth>::int_vector_type   m_sample_vals_and_pointer;
00067         size_type               m_elements;    // number of elements
00068 
00069         // workaround function for the constructor
00070         void construct() {
00071             m_elements = 0;
00072         }
00073         void copy(const enc_vector_dna& v);
00074 
00075         void write_sumblock(uint64_t x, uint64_t blocknr, uint64_t*& sumblock, bool*& overflow) {
00076             if (!overflow[blocknr]) {
00077                 if (x > 255) {
00078                     overflow[blocknr] = true; sumblock[blocknr] = 0;
00079                 } else {
00080                     sumblock[blocknr] += x;
00081                     if (sumblock[blocknr]>255) {
00082                         overflow[blocknr] = true;
00083                         sumblock[blocknr] = 0;
00084                     }
00085                 }
00086             }
00087 
00088         }
00089 
00090     public:
00092         enc_vector_dna() {
00093             construct();
00094         };
00096 
00099         enc_vector_dna(const enc_vector_dna& v);
00100 
00102 
00106         template<class Container>
00107         enc_vector_dna(const Container& c) {
00108             construct();
00109             init(c);
00110         };
00111 
00112         template<class Container>
00113         void init(const Container& c);
00114 
00116         ~enc_vector_dna() {
00117         };
00118 
00120 
00124         size_type size()const;
00125 
00127 
00129         static size_type max_size();
00130 
00132 
00137         bool empty() const;
00138 
00140 
00148         void swap(enc_vector_dna& v);
00149 
00151 
00155         const const_iterator begin()const;
00156 
00158 
00162         const const_iterator end()const;
00163 
00164         // Iterator that points to the last element of the enc_vector_dna.
00165         /*
00166          *      Required for the Container Concept of the STL.
00167          *  \sa rend()
00168          */
00169 //              reverse_iterator rbegin()const;
00170 
00171         // Iterator that points to the position before the first element of the enc_vector_dna.
00172         /*
00173          *      Required for the Container Concept of the STL
00174          *  \sa rbegin()
00175          */
00176 //              reverse_iterator rend()const;
00177 
00179 
00183         value_type operator[](size_type i)const;
00184 
00186 
00189         enc_vector_dna& operator=(const enc_vector_dna& v);
00190 
00192 
00200         bool operator==(const enc_vector_dna& v)const;
00201 
00203 
00211         bool operator!=(const enc_vector_dna& v)const;
00212 
00214 
00217         size_type serialize(std::ostream& out) const;
00218 
00220         void load(std::istream& in);
00221 
00223 
00226         value_type sample(const size_type i) const;
00227 };
00228 
00229 
00230 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00231 inline typename enc_vector_dna<SampleDens,fixedIntWidth>::value_type enc_vector_dna<SampleDens,fixedIntWidth>::sample(const size_type i)const
00232 {
00233     if (i+1 == 0 || i >= m_elements/SampleDens) {
00234         throw std::out_of_range("OUT_OF_RANGE_ERROR: enc_vector_dna::sample[](size_type); i >= size()!");
00235         return 0;
00236     }
00237     return m_sample_vals_and_pointer[i<<1];
00238 }
00239 
00240 
00241 inline uint64_t decode64bit(uint64_t w)
00242 {
00243     uint64_t result = 0;
00244     while (w) {
00245         uint16_t temp = coder::fibonacci::Fib2bin_0_16_greedy[w&0xFFFF], shift;
00246         if ((shift=(temp>>11)) > 0) {
00247             result += (temp & 0x7FFULL);
00248             w >>= shift;
00249         } else {
00250             temp = 0;
00251             do {
00252                 result += coder::fibonacci::Fib2bin_0_95[(temp<<12) | (w&0xFFF)];
00253                 if ((shift =  coder::fibonacci::Fib2binShift[w&0x1FFF]) > 0) {
00254                     w >>= shift;
00255                     break;
00256                 } else {
00257                     w >>= 12;
00258                     temp++;
00259                 }
00260             } while (1);
00261         }
00262     }
00263     return result;
00264 }
00265 
00266 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00267 inline typename enc_vector_dna<SampleDens,fixedIntWidth>::value_type enc_vector_dna<SampleDens,fixedIntWidth>::operator[](size_type i)const
00268 {
00269     if (i+1 == 0 || i >= m_elements) {
00270         throw std::out_of_range("OUT_OF_RANGE_ERROR: enc_vector_dna::operator[](size_type); idx >= size()!");
00271         return 0;
00272     }
00273     const size_type idx  = i/SampleDens;
00274     i                           -= SampleDens*idx; // values to decode
00275     uint64_t result = m_sample_vals_and_pointer[idx<<1];
00276     if (!i) // if i==0
00277         return result;
00278 //      uint64_t expected = result + coder::fibonacci::decode1<true, false, int*>(m_z.data(), m_sample_vals_and_pointer[(idx<<1)+1], i);
00279 
00280     uint8_t offset                      = m_sample_vals_and_pointer[(idx<<1)+1] & 0x3F;
00281     const uint64_t* data        = m_z.data() + (m_sample_vals_and_pointer[(idx<<1)+1] >> 6);
00282     uint64_t w                          = (*data) & ~bit_magic::Li1Mask[offset];
00283 //      std::cerr<<"0 w="<<w<<std::endl;
00284     bool carry = false;
00285     uint64_t m = bit_magic::all11BPs(w, carry); // calc end positions of fibonacci encoded numbers
00286     size_type cnt = bit_magic::b1Cnt(m); // count ends of fibonacci encoded numbers
00287 
00288     if (cnt >= i) {
00289         // decode i values in a block and return result
00290         w &= bit_magic::Li1Mask[bit_magic::i1BP(m,i)+1];
00291 //              assert( result + decode64bit(w>>offset) == expected );
00292         return result + decode64bit(w>>offset);
00293     } else {
00294         // 0 < cnt < i
00295 //              if(cnt==0) goto slow_decode;
00296         uint8_t last1pos = bit_magic::l1BP(m)+/*1*/(cnt>0),temp=0;
00297         w &= bit_magic::Li1Mask[last1pos];
00298         result += decode64bit(w>>offset);
00299 //      uint64_t expected1 = m_sample_vals_and_pointer[idx<<1] + coder::fibonacci::decode1<true, false, int*>(m_z.data(), m_sample_vals_and_pointer[(idx<<1)+1], cnt);
00300 //              assert(result == expected1);
00301 
00302         uint8_t  blocknr        = (data-m_z.data())%9;
00303         uint64_t sumblock       = (*(data + 8 - blocknr))>>(blocknr<<3);
00304         do {
00305             ++data; ++blocknr;
00306             sumblock >>= 8;
00307             if (blocknr==8) {
00308                 ++data;
00309                 sumblock=*(data+8);
00310                 blocknr=0;
00311             }
00312             m = bit_magic::all11BPs(*data,carry);
00313             cnt += (temp=bit_magic::b1Cnt(m));
00314             if (sumblock&0xFF) {
00315                 result += sumblock&0xFF;
00316 //                              if(cnt<=i){
00317 //      uint64_t expected2 = m_sample_vals_and_pointer[idx<<1] + coder::fibonacci::decode1<true, false, int*>(m_z.data(), m_sample_vals_and_pointer[(idx<<1)+1], cnt);
00318 //                              assert(result==expected2);
00319 //                              }
00320             } else
00321                 goto slow_decode;
00322         } while (cnt < i);
00323         if (cnt==i) {
00324 //                      assert(result == expected);
00325             return result;
00326         }
00327         offset = bit_magic::i1BP(m,temp+i-cnt)+1;
00328         w = (*data) >> offset;
00329         w &= bit_magic::Li1Mask[bit_magic::l1BP(m>>offset)+1];
00330 //              assert( result == expected );
00331         return result-decode64bit(w);
00332     }
00333 
00334 slow_decode:
00335 
00336     result = m_sample_vals_and_pointer[idx<<1];
00337     return result + coder::fibonacci::decode1<true, false, int*>(m_z.data(), m_sample_vals_and_pointer[(idx<<1)+1], i);
00338 }
00339 /*
00340 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00341 inline typename enc_vector_dna<SampleDens,fixedIntWidth>::value_type enc_vector_dna<SampleDens,fixedIntWidth>::operator[](const size_type i)const {
00342         if( i+1 == 0 || i >= m_elements  ){
00343                 throw std::out_of_range("OUT_OF_RANGE_ERROR: enc_vector_dna::operator[](size_type); idx >= size()!");
00344                 return 0;
00345         }
00346         size_type idx = i/SampleDens;
00347         size_type n   = i-SampleDens*idx; // values to decode
00348         uint64_t result = m_sample_vals_and_pointer[idx<<1];
00349         if(n==0)
00350                 return result;
00351 //if(n==69)
00352 //std::cout<<"n="<<n<<std::endl;
00353         // n > 0
00354         int16_t offset = m_sample_vals_and_pointer[(idx<<1)+1] & 0x3F;
00355         uint64_t w = m_sample_vals_and_pointer[(idx<<1)+1] >> 6;
00356         const uint64_t *data = m_z.data() + w;
00357         uint8_t blocknr = w%9;
00358 //std::cout<<"blocknr="<<(int)blocknr<<std::endl;
00359 
00360 //uint64_t checkbuf[1024] = {0};
00361 //checkbuf[0]= *data;
00362 //uint64_t checkres = m_sample_vals_and_pointer[idx<<1];
00363 //uint64_t *cb=checkbuf;
00364 //int16_t checkoff = offset;
00365 
00366         w = *data;
00367         bool carry = false;
00368         uint64_t m = bit_magic::all11BPs(w&~bit_magic::Li1Mask[offset], carry); // calc end positions of fibonacci encoded numbers
00369         size_type cnt = bit_magic::b1Cnt(m); // count ends of fibonacci encoded numbers
00370         if( cnt >= n  ){
00371 //std::cout<<"Case a: result so far = "<<result<<" cnt="<<cnt<<" n="<<n<<" offset="<<(int)offset<<std::endl;
00372 //std::cout<<"w="<<w<<std::endl;
00373 //std::cout<<"m="<<m<<std::endl;
00374                 // decode n values in a block and return result
00375                 w >>= offset;
00376                 m >>= offset;
00377                 uint16_t to_decode = bit_magic::i1BP(m,n)+1;
00378 //std::cout<<"to_decode = "<<to_decode<<std::endl;
00379                 w &= bit_magic::Li1Mask[to_decode];
00380 //std::cout<<"w="<<w<<std::endl;
00381                 // decode n values in a block and return result
00382 //std::cout<<"decoding "<<decode64bit(w)<<std::endl;
00383                 return result + decode64bit(w);
00384         }else{
00385 //std::cout<<"offset="<<(int)offset<<std::endl;
00386 //std::cout<<"cnt="<<cnt<<std::endl;
00387 //std::cout<<"w="<<w<<std::endl;
00388 //std::cout<<"m="<<m<<" w="<<w<<" cnt="<<cnt<<" offset="<<offset<<std::endl;
00389                 m >>= offset;
00390                 w >>= offset;
00391                 if(m){
00392                         result += decode64bit( w & bit_magic::Li1Mask[bit_magic::l1BP(m)+1] );
00393                 }
00394                 m <<= offset;
00395                 if(offset) m |= (1ULL<<(offset-1));
00396                 w <<= offset;
00397                 uint64_t sumblock = *(data + 8 - blocknr), oldm;
00398                 sumblock >>= (blocknr<<3);
00399                 uint16_t temp;
00400                 do{
00401                         ++data; ++blocknr;
00402                         sumblock >>= 8;
00403 //std::cout<<"sumblock="<<sumblock<<std::endl;
00404                         if(blocknr==8){sumblock=*data; ++data; blocknr=0;}
00405 // *(++cb)=*data;
00406                         oldm = m;
00407                         m    = bit_magic::all11BPs(*data, carry);
00408                         cnt += (temp=bit_magic::b1Cnt(m));
00409                         if( cnt < n ){
00410                                 if( (sumblock & 0xFF) ){// sum is precalculated
00411 //if(n==69)
00412 //std::cout<<"Case 1: sum = "<< (sumblock&0xFF) <<" result so far = "<<result<<" cnt="<<cnt<<" n="<<n<<" w="<<*data<<std::endl;
00413                                         result += (sumblock & 0xFF);
00414                                 }
00415                                 else{// sum is not precalculated
00416                                         offset = bit_magic::l1BP(oldm)+1; // find the leftmost position where an encoded value terminates in the previous word
00417 //if(n==69)
00418 //std::cout<<"Case 2: sum not precalculated "<<" cnt="<<cnt<<" n="<<n<<" offset="<<(int)offset<<std::endl;
00419                                         if(m == 0){// a big value ends in the next word => blocksum of next word = 0, and cnt < n
00420 //std::cout<<"Case 2a: m==0 "<<std::endl;
00421                                                 uint64_t buffer[3];
00422                                                 buffer[0] = *(data-1-(blocknr==0));  // get the last word
00423                                                 buffer[1] = *data;
00424 //                                              oldcarry = carry;
00425                                                 ++data; ++blocknr;
00426                                                 sumblock >>= 8;
00427                                                 if(blocknr==8){sumblock=*data; ++data; blocknr=0;}
00428 // *(++cb)=*data;
00429                                                 buffer[2] = *data;
00430                                                 oldm = m;
00431                                                 m    = bit_magic::all11BPs(*data, carry);
00432 //std::cout<<"Carry = "<<(int)carry<<std::endl;
00433 //std::cout<<buffer[0]<<std::endl;
00434 //std::cout<<buffer[1]<<std::endl;
00435 //std::cout<<buffer[2]<<std::endl;
00436 //std::cout<<"m="<<m<<std::endl;
00437                                                 cnt += (temp=bit_magic::b1Cnt(m));
00438 //std::cout<<"temp="<<(int)temp<<std::endl;
00439                                                 if(cnt >= n){
00440 //std::cout<<"decode = "<< coder::fibonacci::decode<true,false, int*>(buffer, offset, n + temp - cnt) << std::endl << "result = " << result << std::endl;
00441 //std::cout<<"decode check = "<< coder::fibonacci::decode<true,false, int*>(checkbuf, checkoff, n) << std::endl;
00442 //std::cout<<"decode check = "<< coder::fibonacci::decode<true,false, int*>(checkbuf, checkoff, n-1) << std::endl;
00443                                                         return result + coder::fibonacci::decode<true, false, int*>(buffer, offset, n + temp - cnt);
00444                                                 }else{
00445                                                         result += coder::fibonacci::decode<true, false, int*>(buffer, offset, temp);
00446                                                 }
00447                                         }else{
00448 //if(n==69)
00449 //std::cout<<"Case 2b: "<<std::endl;
00450                                                 w = *data;
00451                                                 uint16_t to_decode = bit_magic::l1BP(m)+1;
00452                                                 to_decode += (64-offset);
00453 //                                              std::cout<<"to_decode"<<(int)to_decode<<std::endl;
00454                                                 if( to_decode <= 64 ){
00455 //if(n==69)
00456 //std::cout<<"Case 2b1: "<<std::endl;
00457 //                                                      std::cout<<"w="<<w<<std::endl;
00458 //                                                      std::cout<<"m="<<m<<std::endl;
00459                                                         w <<= (64-offset);
00460 //                                                      std::cout<<"w="<<w<<std::endl;
00461                                                         w |= ((*(data-1-(blocknr==0)) >> offset) & bit_magic::Li1Mask[64-offset]);
00462 //                                                      std::cout<<"last w="<<(*(data-1-(blocknr==0)))<<std::endl;
00463                                                         w &= bit_magic::Li1Mask[to_decode];
00464 //                                                      std::cout<<"w="<<w<<std::endl;
00465                                                         result += decode64bit(w);
00466                                                 }else{
00467 //if(n==69)
00468 //std::cout<<"Case 2b2: "<<std::endl;
00469                                                         uint64_t buffer[2] = { *(data-1-(blocknr==0)), *data  };// falsch wenn buffer[0] das ganz erste wort ist
00470                                                         result += coder::fibonacci::decode<true, false, int*>(buffer, offset, temp);
00471                                                 }
00472                                         }
00473                                 }
00474                         }else{// cnt >= n, => w contains at least one 1 bit
00475 //if(n==69)
00476 //std::cout<<"Case 3:"<<std::endl;
00477                                 uint16_t to_decode = bit_magic::i1BP(m, temp)+1;
00478                                 w = *data & bit_magic::Li1Mask[to_decode];
00479                                 offset = bit_magic::l1BP(oldm)+1;
00480                                 to_decode += (64-offset);
00481                                 if( to_decode <= 64 ){
00482                                         w <<= (64-offset); // space for the encoded value from the previous word
00483                                         w |= ((*(data-1-(blocknr==0)) >> offset) & bit_magic::Li1Mask[64-offset]);
00484                                         return result + decode64bit(w);
00485                                 }else{
00486                                         uint64_t buffer[2] = { *(data-1-(blocknr==0)), *data  };
00487                                         return result + coder::fibonacci::decode<true, false, int*>(buffer, offset, temp);
00488                                 }
00489                         }
00490                 }while(1);
00491         }
00492         return result;
00493         //return m_sample_vals_and_pointer[idx<<1] + coder::decode_prefix_sum(m_z.data(), m_sample_vals_and_pointer[(idx<<1)+1], i-SampleDens*idx );
00494 }
00495 */
00496 
00497 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00498 inline enc_vector_dna<>::size_type enc_vector_dna<SampleDens,fixedIntWidth>::size()const
00499 {
00500     return m_elements;
00501 }
00502 
00503 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00504 inline enc_vector_dna<>::size_type enc_vector_dna<SampleDens,fixedIntWidth>::max_size()
00505 {
00506     return int_vector<>::max_size()/2; // each element could possible occupy double space with selfdelimiting codes
00507 }
00508 
00509 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00510 inline bool enc_vector_dna<SampleDens,fixedIntWidth>::empty()const
00511 {
00512     return 0==m_elements;
00513 }
00514 
00515 
00516 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00517 void enc_vector_dna<SampleDens,fixedIntWidth>::copy(const enc_vector_dna<SampleDens,fixedIntWidth>& v)
00518 {
00519     m_z                                 = v.m_z;                                // copy compressed bit stream
00520     m_sample_vals_and_pointer           = v.m_sample_vals_and_pointer;      // copy sample values
00521     m_elements                  = v.m_elements;                 // copy number of stored elements
00522 }
00523 
00524 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00525 enc_vector_dna<SampleDens,fixedIntWidth>::enc_vector_dna(const enc_vector_dna& v)
00526 {
00527     copy(v);
00528 }
00529 
00530 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00531 enc_vector_dna<SampleDens,fixedIntWidth>& enc_vector_dna<SampleDens,fixedIntWidth>::operator=(const enc_vector_dna<SampleDens,fixedIntWidth>& v)
00532 {
00533     if (this != &v) {// if v and _this_ are not the same object
00534         copy(v);
00535     }
00536     return *this;
00537 }
00538 
00539 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00540 bool enc_vector_dna<SampleDens,fixedIntWidth>::operator==(const enc_vector_dna<SampleDens,fixedIntWidth>& v)const
00541 {
00542     if (this == &v)
00543         return true;
00544     return              m_elements == v.m_elements
00545                 and     m_z == v.m_z
00546                 and     m_sample_vals_and_pointer == v.m_sample_vals_and_pointer;
00547 }
00548 
00549 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00550 bool enc_vector_dna<SampleDens,fixedIntWidth>::operator!=(const enc_vector_dna<SampleDens,fixedIntWidth>& v)const
00551 {
00552     return !(*this == v);
00553 }
00554 
00555 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00556 void enc_vector_dna<SampleDens,fixedIntWidth>::swap(enc_vector_dna<SampleDens,fixedIntWidth>& v)
00557 {
00558     if (this != &v) { // if v and _this_ are not the same object
00559         m_z.swap(v.m_z);                                        // swap compressed bit streams
00560         m_sample_vals_and_pointer.swap(v.m_sample_vals_and_pointer);
00561         std::swap(m_elements, v.m_elements);// swap the number of elements
00562     }
00563 }
00564 
00565 
00566 
00567 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00568 template<class Container>
00569 void enc_vector_dna<SampleDens,fixedIntWidth>::init(const Container& c)
00570 {
00571     // clear BitVectors
00572     m_z.resize(0);
00573     m_elements = 0;
00574 //      m_inc_start.resize(0);
00575     m_sample_vals_and_pointer.resize(0);
00576     if (c.empty())  // if c is empty there is nothing to do...
00577         return;
00578     typename Container::const_iterator  it                      = c.begin(), end = c.end();
00579     typename Container::value_type              v1                      = *it, v2, max_value=0, max_sample_value=0, x;
00580     size_type samples=0;
00581     size_type z_size = 0, z_size_new=0, blocknr=0, newblocknr=0, writeblock=0;
00582     // invariant: blocknr != 8
00583     for (size_type i=0, no_sample=0; it != end; ++it,++i, --no_sample) {
00584         v2 = *it;
00585         if (!no_sample) { // add a sample
00586             no_sample = SampleDens;
00587             if (max_sample_value < v2) max_sample_value = v2;
00588             ++samples;
00589         } else {
00590             if (max_value < v2-v1) max_value = v2 - v1;
00591             if (v2 == v1) {
00592                 throw std::logic_error("enc_vector_dna cannot decode adjacent equal values!");
00593             }
00594             z_size_new = z_size + coder::encoding_length(v2-v1);
00595             newblocknr = (z_size_new/64)%9;
00596             if (newblocknr == 8 or (newblocknr==0 and blocknr==7)) {  //if we reach or pass the sumblock
00597                 z_size_new += 64; // add the bits for the sumblock
00598             }
00599             z_size = z_size_new;
00600             blocknr = (z_size/64)%9;
00601         }
00602         v1=v2;
00603     }
00604     if (blocknr==0 and (z_size%64)==0) {
00605         //no additional bits to add
00606     } else {
00607         assert(blocknr!=8);
00608         z_size += (8-blocknr)*64;
00609         blocknr = (z_size/64)%9;
00610         assert(blocknr==8);
00611         z_size += (64-(z_size%64)); // fill last bits
00612         blocknr = (z_size/64)%9;
00613         assert(blocknr==0 and (z_size%64)==0);
00614     }
00615 
00616 //std::cerr<<"Calculate delta"<<std::endl;
00617     {
00618 //              int_vector<> delta_c( c.size()-samples, 0, sizeof(typename Container::value_type)*8 ); // Vector for difference encoding of c
00619         if (max_sample_value > z_size+1)
00620             m_sample_vals_and_pointer.set_int_width(bit_magic::l1BP(max_sample_value) + 1);
00621         else
00622             m_sample_vals_and_pointer.set_int_width(bit_magic::l1BP(z_size+1) + 1);
00623         m_sample_vals_and_pointer.resize(2*samples+2); // add 2 for last entry
00624 //              int_vector<0>::iterator d_it = delta_c.begin();
00625 //              int_vector<0>::iterator sv_it = m_sample_vals_and_pointer.begin();
00626         typename enc_vector_dna_trait<fixedIntWidth>::int_vector_type::iterator sv_it = m_sample_vals_and_pointer.begin();
00627         z_size = 0;
00628         size_type no_sample=0;
00629         blocknr = 0; newblocknr = 0;
00630         for (it = c.begin(); it != end; ++it, --no_sample) {
00631             v2 = *it;
00632             if (!no_sample) { // add a sample
00633                 no_sample = SampleDens;
00634                 *sv_it = v2; ++sv_it;
00635                 *sv_it = z_size; ++sv_it;
00636             } else {
00637                 z_size_new = z_size + coder::encoding_length(v2-v1);
00638                 newblocknr = (z_size_new/64)%9;
00639                 if (newblocknr == 8 or (newblocknr==0 and blocknr==7)) {
00640                     z_size_new+=64;
00641                 }
00642                 z_size = z_size_new;
00643                 blocknr = (z_size/64)%9;
00644             }
00645             v1=v2;
00646         }
00647         *sv_it = 0; ++sv_it;        // initialize
00648         *sv_it = z_size+1; ++sv_it; // last entry
00649 
00650         m_z.bit_resize(z_size);
00651         z_size = 0;
00652         uint64_t* z_data = coder::raw_data(m_z);
00653         uint8_t offset = 0;
00654         no_sample = 0;
00655         uint64_t sumblock[8]= {0};
00656         uint64_t* sbp=sumblock;
00657         bool overflow[8]= {false,false,false,false,false,false,false,false};
00658         bool* ovp = overflow;
00659         blocknr = 0; newblocknr = 0, writeblock = 0;
00660         for (it = c.begin(); it != end; ++it, --no_sample) {
00661             v2 = *it;
00662             if (!no_sample) { // add a sample
00663                 no_sample = SampleDens;
00664             } else { // invariant: (z_data/64)%9!=8
00665                 x = v2 - v1;
00666                 coder::encode(x, z_data, offset);
00667                 z_size_new = z_size + coder::encoding_length(x);
00668                 newblocknr = (z_size_new/64)%9;
00669                 writeblock = ((z_size_new-1)/64)%9;
00670 
00671                 if (newblocknr == 8) { // if the next integer will be written in the sumblock
00672                     if ((z_size_new%64) > 0) {// if we have already written bits in the sumblock
00673                         *(z_data+1) = *z_data; // copy these bits in the next word
00674                     }
00675                     uint64_t sumblockword=0;
00676                     // calculate sumblock
00677                     if (writeblock == 7) {
00678                         write_sumblock(x, writeblock, sbp, ovp);
00679                         x=0;
00680                     }
00681                     for (int i=7; i>=0; --i) {
00682                         sumblockword<<=8;
00683                         sumblockword+=sumblock[i];
00684                         sumblock[i]=0;
00685                         overflow[i]=false;
00686                     }
00687                     *z_data = sumblockword;
00688                     ++z_data;
00689                     z_size_new += 64;//add 64 because we have added the sumblock
00690                 } else if (newblocknr==0 and blocknr==7) {//
00691                     if ((z_size_new%64) > 0) {
00692                         *(z_data+1) = *z_data; // copy bit from block 0 to block 1
00693                     }
00694                     *z_data = *(z_data-1);      // copy the content of the sumblock to block 0
00695                     uint64_t sumblockword=0;
00696                     // calculate sumblock
00697                     for (int i=7; i>=0; --i) {
00698                         sumblockword<<=8;
00699                         sumblockword+=sumblock[i];
00700                         sumblock[i]=0;
00701                         overflow[i]=false;
00702                     }
00703                     *(z_data-1) = sumblockword;
00704                     ++z_data;
00705                     z_size_new += 64;// add 64 because we have added the sumblock
00706                 }
00707                 z_size = z_size_new;
00708                 blocknr = (z_size/64)%9;
00709                 writeblock = ((z_size-1)/64)%9;
00710 
00711                 // sumblock[blocknr] contains the sum of fibonacci encoded words that ends in this block
00712                 // or 0 if this sum is greater than 255.
00713                 write_sumblock(x, writeblock, sbp, ovp);
00714 //                              if(!overflow[blocknr]){
00715 //                                      if( x > 255 ){
00716 //                                              overflow[blocknr] = true; sumblock[blocknr] = 0;
00717 //                                      }else{
00718 //                                              sumblock[blocknr] += x;
00719 //                                              if(sumblock[blocknr]>255){ overflow[blocknr] = true; sumblock[blocknr] = 0; }
00720 //                                      }
00721 //                              }
00722             }
00723             v1=v2;
00724         }
00725         if (blocknr==0 and (z_size%64)==0) {
00726             //no additional bits to add
00727         } else { // write last sumblock
00728             assert(blocknr!=8);
00729             uint64_t sumblockword=0;
00730             // calculate sumblock
00731             for (int i=7; i>=0; --i) {
00732                 sumblockword<<=8;
00733                 sumblockword+=sumblock[i];
00734                 sumblock[i]=0;
00735                 overflow[i]=false;
00736             }
00737             z_data += (8-blocknr);
00738             *z_data = sumblockword;
00739         }
00740     }
00741 //      delta_c.resize(0);
00742 //std::cerr<<"Calc rank"<<std::endl;
00743 //std::cerr<<"Calc select"<<std::endl;
00744 //std::cerr<<"Finished "<<std::endl;,
00745     m_elements = c.size();
00746 }
00747 
00748 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00749 enc_vector_dna<>::size_type enc_vector_dna<SampleDens,fixedIntWidth>::serialize(std::ostream& out) const
00750 {
00751     size_type written_bytes = 0;
00752     out.write((char*) &m_elements, sizeof(m_elements));
00753     written_bytes += sizeof(m_elements);
00754     written_bytes += m_z.serialize(out);
00755     written_bytes += m_sample_vals_and_pointer.serialize(out);
00756     return written_bytes;
00757 }
00758 
00759 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00760 void enc_vector_dna<SampleDens,fixedIntWidth>::load(std::istream& in)
00761 {
00762     in.read((char*) &m_elements, sizeof(m_elements));
00763     m_z.load(in);
00764     m_sample_vals_and_pointer.load(in);
00765 }
00766 
00767 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00768 const typename enc_vector_dna<SampleDens,fixedIntWidth>::const_iterator enc_vector_dna<SampleDens,fixedIntWidth>::begin()const
00769 {
00770     return const_iterator(this, 0);
00771 }
00772 
00773 template<uint32_t SampleDens, uint8_t fixedIntWidth>
00774 const typename enc_vector_dna<SampleDens,fixedIntWidth>::const_iterator enc_vector_dna<SampleDens,fixedIntWidth>::end()const
00775 {
00776     return const_iterator(this, this->m_elements);
00777 }
00778 
00779 
00780 } // end namespace sdsl
00781 
00782 #endif