SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2008 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #include "int_vector.hpp" 00022 #include "fibonacci_coder.hpp" 00023 #include "iterators.hpp" 00024 00025 #ifndef SDSL_ENC_VECTOR_DNA 00026 #define SDSL_ENC_VECTOR_DNA 00027 00029 namespace sdsl 00030 { 00031 00032 template<uint8_t fixedIntWidth> 00033 struct enc_vector_dna_trait { 00034 typedef int_vector<0> int_vector_type; 00035 }; 00036 00037 template<> 00038 struct enc_vector_dna_trait<32> { 00039 typedef int_vector<32> int_vector_type; 00040 }; 00041 00043 00049 template<uint32_t SampleDens = 8, uint8_t fixedIntWidth=0> 00050 class enc_vector_dna 00051 { 00052 public: 00053 typedef uint64_t value_type; // STL Container requirement 00054 typedef random_access_const_iterator<enc_vector_dna> iterator;// STL Container requirement 00055 typedef iterator const_iterator; // STL Container requirement 00056 typedef const value_type reference; 00057 typedef const value_type const_reference; 00058 typedef const value_type* const_pointer; 00059 typedef ptrdiff_t difference_type;// STL Container requirement 00060 typedef int_vector<>::size_type size_type; // STL Container requirement 00061 typedef coder::fibonacci coder; 00062 static const uint32_t sample_dens = SampleDens; 00063 00064 int_vector<0> m_z; // compressed bit stream 00065 private: 00066 typename enc_vector_dna_trait<fixedIntWidth>::int_vector_type m_sample_vals_and_pointer; 00067 size_type m_elements; // number of elements 00068 00069 // workaround function for the constructor 00070 void construct() { 00071 m_elements = 0; 00072 } 00073 void copy(const enc_vector_dna& v); 00074 00075 void write_sumblock(uint64_t x, uint64_t blocknr, uint64_t*& sumblock, bool*& overflow) { 00076 if (!overflow[blocknr]) { 00077 if (x > 255) { 00078 overflow[blocknr] = true; sumblock[blocknr] = 0; 00079 } else { 00080 sumblock[blocknr] += x; 00081 if (sumblock[blocknr]>255) { 00082 overflow[blocknr] = true; 00083 sumblock[blocknr] = 0; 00084 } 00085 } 00086 } 00087 00088 } 00089 00090 public: 00092 enc_vector_dna() { 00093 construct(); 00094 }; 00096 00099 enc_vector_dna(const enc_vector_dna& v); 00100 00102 00106 template<class Container> 00107 enc_vector_dna(const Container& c) { 00108 construct(); 00109 init(c); 00110 }; 00111 00112 template<class Container> 00113 void init(const Container& c); 00114 00116 ~enc_vector_dna() { 00117 }; 00118 00120 00124 size_type size()const; 00125 00127 00129 static size_type max_size(); 00130 00132 00137 bool empty() const; 00138 00140 00148 void swap(enc_vector_dna& v); 00149 00151 00155 const const_iterator begin()const; 00156 00158 00162 const const_iterator end()const; 00163 00164 // Iterator that points to the last element of the enc_vector_dna. 00165 /* 00166 * Required for the Container Concept of the STL. 00167 * \sa rend() 00168 */ 00169 // reverse_iterator rbegin()const; 00170 00171 // Iterator that points to the position before the first element of the enc_vector_dna. 00172 /* 00173 * Required for the Container Concept of the STL 00174 * \sa rbegin() 00175 */ 00176 // reverse_iterator rend()const; 00177 00179 00183 value_type operator[](size_type i)const; 00184 00186 00189 enc_vector_dna& operator=(const enc_vector_dna& v); 00190 00192 00200 bool operator==(const enc_vector_dna& v)const; 00201 00203 00211 bool operator!=(const enc_vector_dna& v)const; 00212 00214 00217 size_type serialize(std::ostream& out) const; 00218 00220 void load(std::istream& in); 00221 00223 00226 value_type sample(const size_type i) const; 00227 }; 00228 00229 00230 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00231 inline typename enc_vector_dna<SampleDens,fixedIntWidth>::value_type enc_vector_dna<SampleDens,fixedIntWidth>::sample(const size_type i)const 00232 { 00233 if (i+1 == 0 || i >= m_elements/SampleDens) { 00234 throw std::out_of_range("OUT_OF_RANGE_ERROR: enc_vector_dna::sample[](size_type); i >= size()!"); 00235 return 0; 00236 } 00237 return m_sample_vals_and_pointer[i<<1]; 00238 } 00239 00240 00241 inline uint64_t decode64bit(uint64_t w) 00242 { 00243 uint64_t result = 0; 00244 while (w) { 00245 uint16_t temp = coder::fibonacci::Fib2bin_0_16_greedy[w&0xFFFF], shift; 00246 if ((shift=(temp>>11)) > 0) { 00247 result += (temp & 0x7FFULL); 00248 w >>= shift; 00249 } else { 00250 temp = 0; 00251 do { 00252 result += coder::fibonacci::Fib2bin_0_95[(temp<<12) | (w&0xFFF)]; 00253 if ((shift = coder::fibonacci::Fib2binShift[w&0x1FFF]) > 0) { 00254 w >>= shift; 00255 break; 00256 } else { 00257 w >>= 12; 00258 temp++; 00259 } 00260 } while (1); 00261 } 00262 } 00263 return result; 00264 } 00265 00266 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00267 inline typename enc_vector_dna<SampleDens,fixedIntWidth>::value_type enc_vector_dna<SampleDens,fixedIntWidth>::operator[](size_type i)const 00268 { 00269 if (i+1 == 0 || i >= m_elements) { 00270 throw std::out_of_range("OUT_OF_RANGE_ERROR: enc_vector_dna::operator[](size_type); idx >= size()!"); 00271 return 0; 00272 } 00273 const size_type idx = i/SampleDens; 00274 i -= SampleDens*idx; // values to decode 00275 uint64_t result = m_sample_vals_and_pointer[idx<<1]; 00276 if (!i) // if i==0 00277 return result; 00278 // uint64_t expected = result + coder::fibonacci::decode1<true, false, int*>(m_z.data(), m_sample_vals_and_pointer[(idx<<1)+1], i); 00279 00280 uint8_t offset = m_sample_vals_and_pointer[(idx<<1)+1] & 0x3F; 00281 const uint64_t* data = m_z.data() + (m_sample_vals_and_pointer[(idx<<1)+1] >> 6); 00282 uint64_t w = (*data) & ~bit_magic::Li1Mask[offset]; 00283 // std::cerr<<"0 w="<<w<<std::endl; 00284 bool carry = false; 00285 uint64_t m = bit_magic::all11BPs(w, carry); // calc end positions of fibonacci encoded numbers 00286 size_type cnt = bit_magic::b1Cnt(m); // count ends of fibonacci encoded numbers 00287 00288 if (cnt >= i) { 00289 // decode i values in a block and return result 00290 w &= bit_magic::Li1Mask[bit_magic::i1BP(m,i)+1]; 00291 // assert( result + decode64bit(w>>offset) == expected ); 00292 return result + decode64bit(w>>offset); 00293 } else { 00294 // 0 < cnt < i 00295 // if(cnt==0) goto slow_decode; 00296 uint8_t last1pos = bit_magic::l1BP(m)+/*1*/(cnt>0),temp=0; 00297 w &= bit_magic::Li1Mask[last1pos]; 00298 result += decode64bit(w>>offset); 00299 // uint64_t expected1 = m_sample_vals_and_pointer[idx<<1] + coder::fibonacci::decode1<true, false, int*>(m_z.data(), m_sample_vals_and_pointer[(idx<<1)+1], cnt); 00300 // assert(result == expected1); 00301 00302 uint8_t blocknr = (data-m_z.data())%9; 00303 uint64_t sumblock = (*(data + 8 - blocknr))>>(blocknr<<3); 00304 do { 00305 ++data; ++blocknr; 00306 sumblock >>= 8; 00307 if (blocknr==8) { 00308 ++data; 00309 sumblock=*(data+8); 00310 blocknr=0; 00311 } 00312 m = bit_magic::all11BPs(*data,carry); 00313 cnt += (temp=bit_magic::b1Cnt(m)); 00314 if (sumblock&0xFF) { 00315 result += sumblock&0xFF; 00316 // if(cnt<=i){ 00317 // uint64_t expected2 = m_sample_vals_and_pointer[idx<<1] + coder::fibonacci::decode1<true, false, int*>(m_z.data(), m_sample_vals_and_pointer[(idx<<1)+1], cnt); 00318 // assert(result==expected2); 00319 // } 00320 } else 00321 goto slow_decode; 00322 } while (cnt < i); 00323 if (cnt==i) { 00324 // assert(result == expected); 00325 return result; 00326 } 00327 offset = bit_magic::i1BP(m,temp+i-cnt)+1; 00328 w = (*data) >> offset; 00329 w &= bit_magic::Li1Mask[bit_magic::l1BP(m>>offset)+1]; 00330 // assert( result == expected ); 00331 return result-decode64bit(w); 00332 } 00333 00334 slow_decode: 00335 00336 result = m_sample_vals_and_pointer[idx<<1]; 00337 return result + coder::fibonacci::decode1<true, false, int*>(m_z.data(), m_sample_vals_and_pointer[(idx<<1)+1], i); 00338 } 00339 /* 00340 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00341 inline typename enc_vector_dna<SampleDens,fixedIntWidth>::value_type enc_vector_dna<SampleDens,fixedIntWidth>::operator[](const size_type i)const { 00342 if( i+1 == 0 || i >= m_elements ){ 00343 throw std::out_of_range("OUT_OF_RANGE_ERROR: enc_vector_dna::operator[](size_type); idx >= size()!"); 00344 return 0; 00345 } 00346 size_type idx = i/SampleDens; 00347 size_type n = i-SampleDens*idx; // values to decode 00348 uint64_t result = m_sample_vals_and_pointer[idx<<1]; 00349 if(n==0) 00350 return result; 00351 //if(n==69) 00352 //std::cout<<"n="<<n<<std::endl; 00353 // n > 0 00354 int16_t offset = m_sample_vals_and_pointer[(idx<<1)+1] & 0x3F; 00355 uint64_t w = m_sample_vals_and_pointer[(idx<<1)+1] >> 6; 00356 const uint64_t *data = m_z.data() + w; 00357 uint8_t blocknr = w%9; 00358 //std::cout<<"blocknr="<<(int)blocknr<<std::endl; 00359 00360 //uint64_t checkbuf[1024] = {0}; 00361 //checkbuf[0]= *data; 00362 //uint64_t checkres = m_sample_vals_and_pointer[idx<<1]; 00363 //uint64_t *cb=checkbuf; 00364 //int16_t checkoff = offset; 00365 00366 w = *data; 00367 bool carry = false; 00368 uint64_t m = bit_magic::all11BPs(w&~bit_magic::Li1Mask[offset], carry); // calc end positions of fibonacci encoded numbers 00369 size_type cnt = bit_magic::b1Cnt(m); // count ends of fibonacci encoded numbers 00370 if( cnt >= n ){ 00371 //std::cout<<"Case a: result so far = "<<result<<" cnt="<<cnt<<" n="<<n<<" offset="<<(int)offset<<std::endl; 00372 //std::cout<<"w="<<w<<std::endl; 00373 //std::cout<<"m="<<m<<std::endl; 00374 // decode n values in a block and return result 00375 w >>= offset; 00376 m >>= offset; 00377 uint16_t to_decode = bit_magic::i1BP(m,n)+1; 00378 //std::cout<<"to_decode = "<<to_decode<<std::endl; 00379 w &= bit_magic::Li1Mask[to_decode]; 00380 //std::cout<<"w="<<w<<std::endl; 00381 // decode n values in a block and return result 00382 //std::cout<<"decoding "<<decode64bit(w)<<std::endl; 00383 return result + decode64bit(w); 00384 }else{ 00385 //std::cout<<"offset="<<(int)offset<<std::endl; 00386 //std::cout<<"cnt="<<cnt<<std::endl; 00387 //std::cout<<"w="<<w<<std::endl; 00388 //std::cout<<"m="<<m<<" w="<<w<<" cnt="<<cnt<<" offset="<<offset<<std::endl; 00389 m >>= offset; 00390 w >>= offset; 00391 if(m){ 00392 result += decode64bit( w & bit_magic::Li1Mask[bit_magic::l1BP(m)+1] ); 00393 } 00394 m <<= offset; 00395 if(offset) m |= (1ULL<<(offset-1)); 00396 w <<= offset; 00397 uint64_t sumblock = *(data + 8 - blocknr), oldm; 00398 sumblock >>= (blocknr<<3); 00399 uint16_t temp; 00400 do{ 00401 ++data; ++blocknr; 00402 sumblock >>= 8; 00403 //std::cout<<"sumblock="<<sumblock<<std::endl; 00404 if(blocknr==8){sumblock=*data; ++data; blocknr=0;} 00405 // *(++cb)=*data; 00406 oldm = m; 00407 m = bit_magic::all11BPs(*data, carry); 00408 cnt += (temp=bit_magic::b1Cnt(m)); 00409 if( cnt < n ){ 00410 if( (sumblock & 0xFF) ){// sum is precalculated 00411 //if(n==69) 00412 //std::cout<<"Case 1: sum = "<< (sumblock&0xFF) <<" result so far = "<<result<<" cnt="<<cnt<<" n="<<n<<" w="<<*data<<std::endl; 00413 result += (sumblock & 0xFF); 00414 } 00415 else{// sum is not precalculated 00416 offset = bit_magic::l1BP(oldm)+1; // find the leftmost position where an encoded value terminates in the previous word 00417 //if(n==69) 00418 //std::cout<<"Case 2: sum not precalculated "<<" cnt="<<cnt<<" n="<<n<<" offset="<<(int)offset<<std::endl; 00419 if(m == 0){// a big value ends in the next word => blocksum of next word = 0, and cnt < n 00420 //std::cout<<"Case 2a: m==0 "<<std::endl; 00421 uint64_t buffer[3]; 00422 buffer[0] = *(data-1-(blocknr==0)); // get the last word 00423 buffer[1] = *data; 00424 // oldcarry = carry; 00425 ++data; ++blocknr; 00426 sumblock >>= 8; 00427 if(blocknr==8){sumblock=*data; ++data; blocknr=0;} 00428 // *(++cb)=*data; 00429 buffer[2] = *data; 00430 oldm = m; 00431 m = bit_magic::all11BPs(*data, carry); 00432 //std::cout<<"Carry = "<<(int)carry<<std::endl; 00433 //std::cout<<buffer[0]<<std::endl; 00434 //std::cout<<buffer[1]<<std::endl; 00435 //std::cout<<buffer[2]<<std::endl; 00436 //std::cout<<"m="<<m<<std::endl; 00437 cnt += (temp=bit_magic::b1Cnt(m)); 00438 //std::cout<<"temp="<<(int)temp<<std::endl; 00439 if(cnt >= n){ 00440 //std::cout<<"decode = "<< coder::fibonacci::decode<true,false, int*>(buffer, offset, n + temp - cnt) << std::endl << "result = " << result << std::endl; 00441 //std::cout<<"decode check = "<< coder::fibonacci::decode<true,false, int*>(checkbuf, checkoff, n) << std::endl; 00442 //std::cout<<"decode check = "<< coder::fibonacci::decode<true,false, int*>(checkbuf, checkoff, n-1) << std::endl; 00443 return result + coder::fibonacci::decode<true, false, int*>(buffer, offset, n + temp - cnt); 00444 }else{ 00445 result += coder::fibonacci::decode<true, false, int*>(buffer, offset, temp); 00446 } 00447 }else{ 00448 //if(n==69) 00449 //std::cout<<"Case 2b: "<<std::endl; 00450 w = *data; 00451 uint16_t to_decode = bit_magic::l1BP(m)+1; 00452 to_decode += (64-offset); 00453 // std::cout<<"to_decode"<<(int)to_decode<<std::endl; 00454 if( to_decode <= 64 ){ 00455 //if(n==69) 00456 //std::cout<<"Case 2b1: "<<std::endl; 00457 // std::cout<<"w="<<w<<std::endl; 00458 // std::cout<<"m="<<m<<std::endl; 00459 w <<= (64-offset); 00460 // std::cout<<"w="<<w<<std::endl; 00461 w |= ((*(data-1-(blocknr==0)) >> offset) & bit_magic::Li1Mask[64-offset]); 00462 // std::cout<<"last w="<<(*(data-1-(blocknr==0)))<<std::endl; 00463 w &= bit_magic::Li1Mask[to_decode]; 00464 // std::cout<<"w="<<w<<std::endl; 00465 result += decode64bit(w); 00466 }else{ 00467 //if(n==69) 00468 //std::cout<<"Case 2b2: "<<std::endl; 00469 uint64_t buffer[2] = { *(data-1-(blocknr==0)), *data };// falsch wenn buffer[0] das ganz erste wort ist 00470 result += coder::fibonacci::decode<true, false, int*>(buffer, offset, temp); 00471 } 00472 } 00473 } 00474 }else{// cnt >= n, => w contains at least one 1 bit 00475 //if(n==69) 00476 //std::cout<<"Case 3:"<<std::endl; 00477 uint16_t to_decode = bit_magic::i1BP(m, temp)+1; 00478 w = *data & bit_magic::Li1Mask[to_decode]; 00479 offset = bit_magic::l1BP(oldm)+1; 00480 to_decode += (64-offset); 00481 if( to_decode <= 64 ){ 00482 w <<= (64-offset); // space for the encoded value from the previous word 00483 w |= ((*(data-1-(blocknr==0)) >> offset) & bit_magic::Li1Mask[64-offset]); 00484 return result + decode64bit(w); 00485 }else{ 00486 uint64_t buffer[2] = { *(data-1-(blocknr==0)), *data }; 00487 return result + coder::fibonacci::decode<true, false, int*>(buffer, offset, temp); 00488 } 00489 } 00490 }while(1); 00491 } 00492 return result; 00493 //return m_sample_vals_and_pointer[idx<<1] + coder::decode_prefix_sum(m_z.data(), m_sample_vals_and_pointer[(idx<<1)+1], i-SampleDens*idx ); 00494 } 00495 */ 00496 00497 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00498 inline enc_vector_dna<>::size_type enc_vector_dna<SampleDens,fixedIntWidth>::size()const 00499 { 00500 return m_elements; 00501 } 00502 00503 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00504 inline enc_vector_dna<>::size_type enc_vector_dna<SampleDens,fixedIntWidth>::max_size() 00505 { 00506 return int_vector<>::max_size()/2; // each element could possible occupy double space with selfdelimiting codes 00507 } 00508 00509 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00510 inline bool enc_vector_dna<SampleDens,fixedIntWidth>::empty()const 00511 { 00512 return 0==m_elements; 00513 } 00514 00515 00516 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00517 void enc_vector_dna<SampleDens,fixedIntWidth>::copy(const enc_vector_dna<SampleDens,fixedIntWidth>& v) 00518 { 00519 m_z = v.m_z; // copy compressed bit stream 00520 m_sample_vals_and_pointer = v.m_sample_vals_and_pointer; // copy sample values 00521 m_elements = v.m_elements; // copy number of stored elements 00522 } 00523 00524 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00525 enc_vector_dna<SampleDens,fixedIntWidth>::enc_vector_dna(const enc_vector_dna& v) 00526 { 00527 copy(v); 00528 } 00529 00530 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00531 enc_vector_dna<SampleDens,fixedIntWidth>& enc_vector_dna<SampleDens,fixedIntWidth>::operator=(const enc_vector_dna<SampleDens,fixedIntWidth>& v) 00532 { 00533 if (this != &v) {// if v and _this_ are not the same object 00534 copy(v); 00535 } 00536 return *this; 00537 } 00538 00539 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00540 bool enc_vector_dna<SampleDens,fixedIntWidth>::operator==(const enc_vector_dna<SampleDens,fixedIntWidth>& v)const 00541 { 00542 if (this == &v) 00543 return true; 00544 return m_elements == v.m_elements 00545 and m_z == v.m_z 00546 and m_sample_vals_and_pointer == v.m_sample_vals_and_pointer; 00547 } 00548 00549 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00550 bool enc_vector_dna<SampleDens,fixedIntWidth>::operator!=(const enc_vector_dna<SampleDens,fixedIntWidth>& v)const 00551 { 00552 return !(*this == v); 00553 } 00554 00555 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00556 void enc_vector_dna<SampleDens,fixedIntWidth>::swap(enc_vector_dna<SampleDens,fixedIntWidth>& v) 00557 { 00558 if (this != &v) { // if v and _this_ are not the same object 00559 m_z.swap(v.m_z); // swap compressed bit streams 00560 m_sample_vals_and_pointer.swap(v.m_sample_vals_and_pointer); 00561 std::swap(m_elements, v.m_elements);// swap the number of elements 00562 } 00563 } 00564 00565 00566 00567 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00568 template<class Container> 00569 void enc_vector_dna<SampleDens,fixedIntWidth>::init(const Container& c) 00570 { 00571 // clear BitVectors 00572 m_z.resize(0); 00573 m_elements = 0; 00574 // m_inc_start.resize(0); 00575 m_sample_vals_and_pointer.resize(0); 00576 if (c.empty()) // if c is empty there is nothing to do... 00577 return; 00578 typename Container::const_iterator it = c.begin(), end = c.end(); 00579 typename Container::value_type v1 = *it, v2, max_value=0, max_sample_value=0, x; 00580 size_type samples=0; 00581 size_type z_size = 0, z_size_new=0, blocknr=0, newblocknr=0, writeblock=0; 00582 // invariant: blocknr != 8 00583 for (size_type i=0, no_sample=0; it != end; ++it,++i, --no_sample) { 00584 v2 = *it; 00585 if (!no_sample) { // add a sample 00586 no_sample = SampleDens; 00587 if (max_sample_value < v2) max_sample_value = v2; 00588 ++samples; 00589 } else { 00590 if (max_value < v2-v1) max_value = v2 - v1; 00591 if (v2 == v1) { 00592 throw std::logic_error("enc_vector_dna cannot decode adjacent equal values!"); 00593 } 00594 z_size_new = z_size + coder::encoding_length(v2-v1); 00595 newblocknr = (z_size_new/64)%9; 00596 if (newblocknr == 8 or (newblocknr==0 and blocknr==7)) { //if we reach or pass the sumblock 00597 z_size_new += 64; // add the bits for the sumblock 00598 } 00599 z_size = z_size_new; 00600 blocknr = (z_size/64)%9; 00601 } 00602 v1=v2; 00603 } 00604 if (blocknr==0 and (z_size%64)==0) { 00605 //no additional bits to add 00606 } else { 00607 assert(blocknr!=8); 00608 z_size += (8-blocknr)*64; 00609 blocknr = (z_size/64)%9; 00610 assert(blocknr==8); 00611 z_size += (64-(z_size%64)); // fill last bits 00612 blocknr = (z_size/64)%9; 00613 assert(blocknr==0 and (z_size%64)==0); 00614 } 00615 00616 //std::cerr<<"Calculate delta"<<std::endl; 00617 { 00618 // int_vector<> delta_c( c.size()-samples, 0, sizeof(typename Container::value_type)*8 ); // Vector for difference encoding of c 00619 if (max_sample_value > z_size+1) 00620 m_sample_vals_and_pointer.set_int_width(bit_magic::l1BP(max_sample_value) + 1); 00621 else 00622 m_sample_vals_and_pointer.set_int_width(bit_magic::l1BP(z_size+1) + 1); 00623 m_sample_vals_and_pointer.resize(2*samples+2); // add 2 for last entry 00624 // int_vector<0>::iterator d_it = delta_c.begin(); 00625 // int_vector<0>::iterator sv_it = m_sample_vals_and_pointer.begin(); 00626 typename enc_vector_dna_trait<fixedIntWidth>::int_vector_type::iterator sv_it = m_sample_vals_and_pointer.begin(); 00627 z_size = 0; 00628 size_type no_sample=0; 00629 blocknr = 0; newblocknr = 0; 00630 for (it = c.begin(); it != end; ++it, --no_sample) { 00631 v2 = *it; 00632 if (!no_sample) { // add a sample 00633 no_sample = SampleDens; 00634 *sv_it = v2; ++sv_it; 00635 *sv_it = z_size; ++sv_it; 00636 } else { 00637 z_size_new = z_size + coder::encoding_length(v2-v1); 00638 newblocknr = (z_size_new/64)%9; 00639 if (newblocknr == 8 or (newblocknr==0 and blocknr==7)) { 00640 z_size_new+=64; 00641 } 00642 z_size = z_size_new; 00643 blocknr = (z_size/64)%9; 00644 } 00645 v1=v2; 00646 } 00647 *sv_it = 0; ++sv_it; // initialize 00648 *sv_it = z_size+1; ++sv_it; // last entry 00649 00650 m_z.bit_resize(z_size); 00651 z_size = 0; 00652 uint64_t* z_data = coder::raw_data(m_z); 00653 uint8_t offset = 0; 00654 no_sample = 0; 00655 uint64_t sumblock[8]= {0}; 00656 uint64_t* sbp=sumblock; 00657 bool overflow[8]= {false,false,false,false,false,false,false,false}; 00658 bool* ovp = overflow; 00659 blocknr = 0; newblocknr = 0, writeblock = 0; 00660 for (it = c.begin(); it != end; ++it, --no_sample) { 00661 v2 = *it; 00662 if (!no_sample) { // add a sample 00663 no_sample = SampleDens; 00664 } else { // invariant: (z_data/64)%9!=8 00665 x = v2 - v1; 00666 coder::encode(x, z_data, offset); 00667 z_size_new = z_size + coder::encoding_length(x); 00668 newblocknr = (z_size_new/64)%9; 00669 writeblock = ((z_size_new-1)/64)%9; 00670 00671 if (newblocknr == 8) { // if the next integer will be written in the sumblock 00672 if ((z_size_new%64) > 0) {// if we have already written bits in the sumblock 00673 *(z_data+1) = *z_data; // copy these bits in the next word 00674 } 00675 uint64_t sumblockword=0; 00676 // calculate sumblock 00677 if (writeblock == 7) { 00678 write_sumblock(x, writeblock, sbp, ovp); 00679 x=0; 00680 } 00681 for (int i=7; i>=0; --i) { 00682 sumblockword<<=8; 00683 sumblockword+=sumblock[i]; 00684 sumblock[i]=0; 00685 overflow[i]=false; 00686 } 00687 *z_data = sumblockword; 00688 ++z_data; 00689 z_size_new += 64;//add 64 because we have added the sumblock 00690 } else if (newblocknr==0 and blocknr==7) {// 00691 if ((z_size_new%64) > 0) { 00692 *(z_data+1) = *z_data; // copy bit from block 0 to block 1 00693 } 00694 *z_data = *(z_data-1); // copy the content of the sumblock to block 0 00695 uint64_t sumblockword=0; 00696 // calculate sumblock 00697 for (int i=7; i>=0; --i) { 00698 sumblockword<<=8; 00699 sumblockword+=sumblock[i]; 00700 sumblock[i]=0; 00701 overflow[i]=false; 00702 } 00703 *(z_data-1) = sumblockword; 00704 ++z_data; 00705 z_size_new += 64;// add 64 because we have added the sumblock 00706 } 00707 z_size = z_size_new; 00708 blocknr = (z_size/64)%9; 00709 writeblock = ((z_size-1)/64)%9; 00710 00711 // sumblock[blocknr] contains the sum of fibonacci encoded words that ends in this block 00712 // or 0 if this sum is greater than 255. 00713 write_sumblock(x, writeblock, sbp, ovp); 00714 // if(!overflow[blocknr]){ 00715 // if( x > 255 ){ 00716 // overflow[blocknr] = true; sumblock[blocknr] = 0; 00717 // }else{ 00718 // sumblock[blocknr] += x; 00719 // if(sumblock[blocknr]>255){ overflow[blocknr] = true; sumblock[blocknr] = 0; } 00720 // } 00721 // } 00722 } 00723 v1=v2; 00724 } 00725 if (blocknr==0 and (z_size%64)==0) { 00726 //no additional bits to add 00727 } else { // write last sumblock 00728 assert(blocknr!=8); 00729 uint64_t sumblockword=0; 00730 // calculate sumblock 00731 for (int i=7; i>=0; --i) { 00732 sumblockword<<=8; 00733 sumblockword+=sumblock[i]; 00734 sumblock[i]=0; 00735 overflow[i]=false; 00736 } 00737 z_data += (8-blocknr); 00738 *z_data = sumblockword; 00739 } 00740 } 00741 // delta_c.resize(0); 00742 //std::cerr<<"Calc rank"<<std::endl; 00743 //std::cerr<<"Calc select"<<std::endl; 00744 //std::cerr<<"Finished "<<std::endl;, 00745 m_elements = c.size(); 00746 } 00747 00748 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00749 enc_vector_dna<>::size_type enc_vector_dna<SampleDens,fixedIntWidth>::serialize(std::ostream& out) const 00750 { 00751 size_type written_bytes = 0; 00752 out.write((char*) &m_elements, sizeof(m_elements)); 00753 written_bytes += sizeof(m_elements); 00754 written_bytes += m_z.serialize(out); 00755 written_bytes += m_sample_vals_and_pointer.serialize(out); 00756 return written_bytes; 00757 } 00758 00759 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00760 void enc_vector_dna<SampleDens,fixedIntWidth>::load(std::istream& in) 00761 { 00762 in.read((char*) &m_elements, sizeof(m_elements)); 00763 m_z.load(in); 00764 m_sample_vals_and_pointer.load(in); 00765 } 00766 00767 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00768 const typename enc_vector_dna<SampleDens,fixedIntWidth>::const_iterator enc_vector_dna<SampleDens,fixedIntWidth>::begin()const 00769 { 00770 return const_iterator(this, 0); 00771 } 00772 00773 template<uint32_t SampleDens, uint8_t fixedIntWidth> 00774 const typename enc_vector_dna<SampleDens,fixedIntWidth>::const_iterator enc_vector_dna<SampleDens,fixedIntWidth>::end()const 00775 { 00776 return const_iterator(this, this->m_elements); 00777 } 00778 00779 00780 } // end namespace sdsl 00781 00782 #endif