SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2008 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef INCLUDED_SDSL_CSA_SADA 00022 #define INCLUDED_SDSL_CSA_SADA 00023 00024 #include "enc_vector.hpp" 00025 #include "int_vector.hpp" 00026 #include "algorithms.hpp" 00027 #include "iterators.hpp" 00028 #include "suffixarrays.hpp" 00029 #include "suffixarray_helper.hpp" 00030 #include "util.hpp" 00031 #include "testutils.hpp" 00032 #include "bwt_construct.hpp" 00033 #include <iostream> 00034 #include <algorithm> 00035 #include <cassert> 00036 #include <cstring> // for strlen 00037 #include <iomanip> 00038 #include <iterator> 00039 00040 namespace sdsl 00041 { 00042 00043 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00044 class csa_sada; 00045 00046 template<uint8_t fixedIntWidth> 00047 struct csa_sada_trait { 00048 typedef int_vector<0> int_vector_type; 00049 }; 00050 00051 template<> 00052 struct csa_sada_trait<32> { 00053 typedef int_vector<32> int_vector_type; 00054 }; 00055 00056 template<> 00057 struct csa_sada_trait<64> { 00058 typedef int_vector<64> int_vector_type; 00059 }; 00060 00061 00063 00071 template<class EncVector = enc_vector<>, uint32_t SampleDens = 32, uint32_t InvSampleDens = 64, uint8_t fixedIntWidth = 0> 00072 class csa_sada 00073 { 00074 public: 00075 typedef uint64_t value_type; // STL Container requirement 00076 typedef random_access_const_iterator<csa_sada> const_iterator;// STL Container requirement 00077 typedef const_iterator iterator; // STL Container requirement 00078 typedef const value_type const_reference; 00079 typedef const_reference reference; 00080 typedef const_reference* pointer; 00081 typedef const pointer const_pointer; 00082 typedef int_vector<>::size_type size_type; // STL Container requirement 00083 typedef size_type csa_size_type; 00084 typedef ptrdiff_t difference_type; // STL Container requirement 00085 typedef EncVector enc_vector_type; 00086 typedef psi_of_csa_psi<csa_sada> psi_type; 00087 typedef bwt_of_csa_psi<csa_sada> bwt_type; 00088 typedef const unsigned char* pattern_type; 00089 typedef unsigned char char_type; 00090 typedef typename csa_sada_trait<fixedIntWidth>::int_vector_type sa_sample_type; 00091 typedef typename csa_sada_trait<fixedIntWidth>::int_vector_type isa_sample_type; 00092 00093 typedef csa_tag index_category; 00094 00095 enum { sa_sample_dens = SampleDens, 00096 isa_sample_dens = InvSampleDens 00097 }; 00098 00099 friend class psi_of_csa_psi<csa_sada>; 00100 friend class bwt_of_csa_psi<csa_sada>; 00101 00102 static const int linear_decode_limit = 100000; 00103 00104 // static const uint32_t sample_dens = SampleDens; 00105 private: 00106 enc_vector_type m_psi; // psi function 00107 psi_type m_psi_wrapper; 00108 bwt_type m_bwt; 00109 sa_sample_type m_sa_sample; // suffix array samples 00110 isa_sample_type m_isa_sample; // inverse suffix array samples 00111 int_vector<8> m_char2comp; // =0 for the 0-byte and all characters which do not occur in the text 00112 int_vector<8> m_comp2char; 00113 int_vector<64> m_C; 00114 uint16_t m_sigma; 00115 00116 uint64_t *m_psi_buf; //[SampleDens+1]; // buffer for decoded psi values 00117 00118 template<typename RandomAccessContainer> 00119 void construct(const RandomAccessContainer& sa, const unsigned char* str); 00120 00121 template<typename RandomAccessContainer> 00122 void construct(RandomAccessContainer& sa, const unsigned char* str); 00123 00124 template<typename RandomAccessContainer> 00125 void construct_samples(const RandomAccessContainer& sa, const unsigned char* str); 00126 00127 void copy(const csa_sada& csa) { 00128 m_psi = csa.m_psi; 00129 m_sa_sample = csa.m_sa_sample; 00130 m_isa_sample = csa.m_isa_sample; 00131 m_char2comp = csa.m_char2comp; 00132 m_comp2char = csa.m_comp2char; 00133 m_C = csa.m_C; 00134 m_sigma = csa.m_sigma; 00135 m_psi_wrapper = psi_type(this); 00136 m_bwt = bwt_type(this); 00137 }; 00138 00139 void create_buffer(){ 00140 if ( enc_vector_type::sample_dens < linear_decode_limit ){ 00141 m_psi_buf = new uint64_t[enc_vector_type::sample_dens+1]; 00142 }else{ 00143 m_psi_buf = NULL; 00144 } 00145 } 00146 00147 void delete_buffer(){ 00148 if ( m_psi_buf != NULL ){ 00149 delete [] m_psi_buf; 00150 } 00151 } 00152 00153 public: 00154 const int_vector<8>& char2comp; 00155 const int_vector<8>& comp2char; 00156 const int_vector<64>& C; 00157 const uint16_t& sigma; 00158 const psi_type& psi; 00159 const bwt_type& bwt; 00160 const sa_sample_type& sa_sample; 00161 const isa_sample_type& isa_sample; 00162 00163 00165 csa_sada():char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma) ,psi(m_psi_wrapper), bwt(m_bwt), sa_sample(m_sa_sample), isa_sample(m_isa_sample) { 00166 util::assign(m_char2comp, int_vector<8>(256, 0)); 00167 util::assign(m_comp2char, int_vector<8>(256, 0)); 00168 util::assign(m_C, int_vector<64>(257, 0)); 00169 m_sigma = 0; 00170 create_buffer(); 00171 } 00173 ~csa_sada() { 00174 delete_buffer(); 00175 } 00176 00178 csa_sada(const csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>& csa):char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma), psi(m_psi_wrapper), bwt(m_bwt), sa_sample(m_sa_sample), isa_sample(m_isa_sample) { 00179 create_buffer(); 00180 copy(csa); 00181 } 00182 00184 00187 template<typename RandomAccessContainer> 00188 csa_sada(const RandomAccessContainer& sa, const unsigned char* str); 00189 00191 00194 template<typename RandomAccessContainer> 00195 csa_sada(RandomAccessContainer& sa, const unsigned char* str); 00196 00198 template<class size_type_class, uint8_t int_width, class size_type_class_1> 00199 csa_sada(int_vector_file_buffer<8, size_type_class>& bwt_buf, 00200 int_vector_file_buffer<int_width, size_type_class_1>& sa_buf); 00201 00202 csa_sada(tMSS& file_map, const std::string& dir, const std::string& id); 00203 00204 void construct(tMSS& file_map, const std::string& dir, const std::string& id); 00205 00207 00209 csa_sada(const unsigned char* str); 00210 00212 00217 size_type size()const { 00218 return m_psi.size(); 00219 } 00220 00222 00225 static size_type max_size() { 00226 return EncVector::max_size(); 00227 } 00228 00230 00233 bool empty()const { 00234 return m_psi.empty(); 00235 } 00236 00238 00246 void swap(csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>& csa); 00247 00249 00252 const_iterator begin()const; 00253 00255 00258 const_iterator end()const; 00259 00261 00267 inline value_type operator[](size_type i)const; 00268 00270 00275 inline value_type operator()(size_type i)const; 00276 00278 00281 csa_sada& operator=(const csa_sada& csa); 00282 00284 00289 bool operator==(const csa_sada& csa)const; 00290 00292 00297 bool operator!=(const csa_sada& csa)const; 00298 00300 00303 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const; 00304 00306 00308 void load(std::istream& in); 00309 00310 uint32_t get_sample_dens() const; 00311 00312 uint32_t get_psi_sample_dens() const; 00313 void set_psi_sample_dens(const uint32_t sample_dens); 00314 00316 00323 size_type rank_bwt(size_type i, const unsigned char c)const { 00324 unsigned char cc = m_char2comp[c]; 00325 if (cc==0 and c!=0) // character is not in the text => return 0 00326 return 0; 00327 if (i == 0) 00328 return 0; 00329 assert(i <= size()); 00330 00331 size_type lower_b, upper_b; // lower_b inclusive, upper_b exclusive 00332 00333 const size_type sd = m_psi.get_sample_dens(); 00334 size_type lower_sb = (m_C[cc]+sd-1)/sd; // lower_sb inclusive 00335 size_type upper_sb = (m_C[cc+1]+sd-1)/sd; // upper_sb exclusive 00336 while (lower_sb+1 < upper_sb) { 00337 size_type mid = (lower_sb+upper_sb)/2; 00338 if (m_psi.sample(mid) >= i) 00339 upper_sb = mid; 00340 else 00341 lower_sb = mid; 00342 } 00343 00344 if (lower_sb == upper_sb) { // the interval was smaller than sd 00345 lower_b = m_C[cc]; upper_b = m_C[cc+1]; 00346 } else if (lower_sb > (m_C[cc]+sd-1)/sd) { // main case 00347 // TODO: don't use get_inter_sampled_values if SampleDens is really 00348 // large 00349 lower_b = lower_sb*sd; 00350 if ( m_psi_buf == NULL ){ 00351 upper_b = std::min(upper_sb*sd, m_C[cc+1]); 00352 goto finish; 00353 } 00354 uint64_t* p=m_psi_buf; 00355 // extract the psi values between two samples 00356 m_psi.get_inter_sampled_values(lower_sb, p); 00357 p = m_psi_buf; 00358 uint64_t smpl = m_psi.sample(lower_sb); 00359 // handle border cases 00360 if (lower_b + m_psi.get_sample_dens() >= m_C[cc+1]) 00361 m_psi_buf[ m_C[cc+1]-lower_b ] = size()-smpl; 00362 else 00363 m_psi_buf[ m_psi.get_sample_dens() ] = size()-smpl; 00364 // search the result linear 00365 while ((*p++)+smpl < i); 00366 00367 return p-1-m_psi_buf + lower_b - m_C[cc]; 00368 } else { // lower_b == (m_C[cc]+sd-1)/sd and lower_sb < upper_sb 00369 if (m_psi.sample(lower_sb) >= i) { 00370 lower_b = m_C[cc]; 00371 upper_b = lower_sb * sd + 1; 00372 } else { 00373 lower_b = lower_sb * sd; 00374 upper_b = std::min(upper_sb*sd, m_C[cc+1]); 00375 } 00376 } 00377 finish: 00378 // binary search the interval [C[cc]..C[cc+1]-1] for the result 00379 // size_type lower_b = m_C[cc], upper_b = m_C[cc+1]; // lower_b inclusive, upper_b exclusive 00380 while (lower_b+1 < upper_b) { 00381 size_type mid = (lower_b+upper_b)/2; 00382 if (m_psi[mid] >= i) 00383 upper_b = mid; 00384 else 00385 lower_b = mid; 00386 } 00387 if (lower_b > m_C[cc]) 00388 return lower_b - m_C[cc] + 1; 00389 else { // lower_b == m_C[cc] 00390 return m_psi[lower_b] < i;// 1 if m_psi[lower_b]<i, 0 otherwise 00391 } 00392 } 00393 00395 00402 size_type select_bwt(size_type i, const unsigned char c)const { 00403 assert(i > 0); 00404 unsigned char cc = m_char2comp[c]; 00405 if (cc==0 and c!=0) // character is not in the text => return 0 00406 return size(); 00407 assert(cc != 255); 00408 if (m_C[cc]+i-1 < m_C[cc+1]) { 00409 return m_psi[m_C[cc]+i-1]; 00410 } else 00411 return size(); 00412 } 00413 }; 00414 00415 // == template functions == 00416 00417 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00418 csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::csa_sada(const unsigned char* str):char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma), psi(m_psi_wrapper), bwt(m_bwt), sa_sample(m_sa_sample), isa_sample(m_isa_sample) 00419 { 00420 create_buffer(); 00421 csa_uncompressed sa(str); 00422 // size_type n = strlen((const char*)str); 00423 // int_vector<> sa(n+1, 0, bit_magic::l1BP(n+1)+1); 00424 // algorithm::calculate_sa(str, n+1, sa); // calculate the suffix array sa of str 00425 // assert(sa.size() == n+1); 00426 algorithm::set_text<csa_sada>(str, sa.size(), m_C, m_char2comp, m_comp2char, m_sigma); 00427 construct(sa, str); 00428 // if( n+1 > 0 and n+1 != size() ) 00429 // throw std::logic_error("csa_sada: text size differ with sa size!"); 00430 } 00431 00432 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00433 template<typename RandomAccessContainer> 00434 csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::csa_sada(const RandomAccessContainer& sa, const unsigned char* str):char2comp(m_char2comp), comp2char(m_comp2char),C(m_C), sigma(m_sigma), psi(m_psi_wrapper), bwt(m_bwt), sa_sample(m_sa_sample), isa_sample(m_isa_sample) 00435 { 00436 create_buffer(); 00437 size_type n = 1; 00438 if (str != NULL) { 00439 n = strlen((const char*)str); 00440 } 00441 algorithm::set_text<csa_sada>(str, n+1, m_C, m_char2comp, m_comp2char, m_sigma); 00442 assert(sa.size() == n+1); 00443 construct(sa, str); 00444 if (n+1 > 0 and n+1 != size()) 00445 throw std::logic_error(util::demangle(typeid(this).name())+": text size differ with sa size!"); 00446 } 00447 00448 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00449 template<typename RandomAccessContainer> 00450 csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::csa_sada(RandomAccessContainer& sa, const unsigned char* str):char2comp(m_char2comp), comp2char(m_comp2char),C(m_C), sigma(m_sigma), psi(m_psi_wrapper), bwt(m_bwt), sa_sample(m_sa_sample), isa_sample(m_isa_sample) 00451 { 00452 create_buffer(); 00453 size_type n = 1; 00454 if (str != NULL) { 00455 n = strlen((const char*)str); 00456 } 00457 assert(sa.size() == n+1); 00458 algorithm::set_text<csa_sada>(str, n+1, m_C, m_char2comp, m_comp2char, m_sigma); 00459 construct(sa, str); 00460 if (n+1 > 0 and n+1 != size()) 00461 throw std::logic_error("csa_sada: text size differ with sa size!"); 00462 } 00463 00464 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00465 template<class size_type_class, uint8_t int_width, class size_type_class_1> 00466 csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::csa_sada(int_vector_file_buffer<8, size_type_class>& bwt_buf, 00467 int_vector_file_buffer<int_width, size_type_class_1>& sa_buf): 00468 char2comp(m_char2comp), comp2char(m_comp2char),C(m_C), sigma(m_sigma), psi(m_psi_wrapper), bwt(m_bwt), sa_sample(m_sa_sample), isa_sample(m_isa_sample) 00469 { 00470 create_buffer(); 00471 bwt_buf.reset(); sa_buf.reset(); 00472 size_type n = bwt_buf.int_vector_size; 00473 algorithm::set_text<csa_sada>(bwt_buf, n, m_C, m_char2comp, m_comp2char, m_sigma); 00474 assert(sa_buf.int_vetor_size == sa_buf.int_vector_size); 00475 00476 size_type cnt_chr[256] = {0}; 00477 for (uint32_t i=0; i<m_sigma; ++i) 00478 cnt_chr[m_comp2char[i]] = C[i]; 00479 stop_watch sw; sw.start(); 00480 // write_R_output(sw, "psi function", "construct","begin"); 00481 // calculate psi 00482 { 00483 bwt_buf.reset(); 00484 int_vector<> temp(n, 0, bit_magic::l1BP(n)+1); 00485 for (size_type i=0, r_sum=0, r=bwt_buf.load_next_block(); r_sum < n;) { 00486 for (; i < r_sum+r; ++i) { 00487 temp[ cnt_chr[ bwt_buf[i-r_sum] ]++ ] = i; 00488 } 00489 r_sum += r; r = bwt_buf.load_next_block(); 00490 } 00491 util::store_to_file(temp, "/tmp/deleteme"); 00492 } 00493 // write_R_output(sw, "psi function", "construct","end"); 00494 int_vector_file_buffer<> psi_buf("/tmp/deleteme"); 00495 // write_R_output(sw, "encoded psi", "construct", "begin"); 00496 m_psi = EncVector(psi_buf); 00497 // write_R_output(sw, "encoded psi", "construct", "end"); 00498 std::remove("/tmp/deleteme"); 00499 algorithm::set_sa_and_isa_samples<csa_sada>(sa_buf, m_sa_sample, m_isa_sample); 00500 m_psi_wrapper = psi_type(this); 00501 m_bwt = bwt_type(this); 00502 } 00503 00504 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00505 csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::csa_sada(tMSS& file_map, const std::string& dir, const std::string& id): 00506 char2comp(m_char2comp), comp2char(m_comp2char),C(m_C), sigma(m_sigma), psi(m_psi_wrapper), bwt(m_bwt), sa_sample(m_sa_sample), isa_sample(m_isa_sample) 00507 { 00508 create_buffer(); 00509 construct(file_map, dir, id); 00510 } 00511 00512 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00513 void csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::construct(tMSS& file_map, const std::string& dir, const std::string& id) 00514 { 00515 if (file_map.find("bwt") == file_map.end()) { // if bwt is not already stored on disk => construct bwt 00516 construct_bwt(file_map, dir, id); 00517 } 00518 int_vector_file_buffer<8> bwt_buf(file_map["bwt"].c_str()); 00519 size_type n = bwt_buf.int_vector_size; 00520 algorithm::set_text<csa_sada>(bwt_buf, n, m_C, m_char2comp, m_comp2char, m_sigma); 00521 00522 size_type cnt_chr[256] = {0}; 00523 for (uint32_t i=0; i<m_sigma; ++i) 00524 cnt_chr[m_comp2char[i]] = C[i]; 00525 stop_watch sw; sw.start(); 00526 write_R_output("csa", "construct PSI","begin",1,0); 00527 // calculate psi 00528 { 00529 bwt_buf.reset(); 00530 int_vector<> psi(n, 0, bit_magic::l1BP(n)+1); 00531 for (size_type i=0, r_sum=0, r=bwt_buf.load_next_block(); r_sum < n;) { 00532 for (; i < r_sum+r; ++i) { 00533 psi[ cnt_chr[ bwt_buf[i-r_sum] ]++ ] = i; 00534 } 00535 r_sum += r; r = bwt_buf.load_next_block(); 00536 } 00537 if (!util::store_to_file(psi, (dir+"psi_"+id).c_str())) { 00538 throw std::ios_base::failure("#csa_sada: Cannot store PSI to file system!"); 00539 } else { 00540 file_map["psi"] = dir+"psi_"+id; 00541 } 00542 } 00543 write_R_output("csa", "construct PSI","end",1,0); 00544 int_vector_file_buffer<> psi_buf(file_map["psi"].c_str()); 00545 // write_R_output(sw, "encoded psi", "construct", "begin"); 00546 m_psi = EncVector(psi_buf); 00547 // write_R_output(sw, "encoded psi", "construct", "end"); 00548 int_vector_file_buffer<> sa_buf(file_map["sa"].c_str()); 00549 algorithm::set_sa_and_isa_samples<csa_sada>(sa_buf, m_sa_sample, m_isa_sample); 00550 m_psi_wrapper = psi_type(this); 00551 m_bwt = bwt_type(this); 00552 } 00553 00554 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00555 uint32_t csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::get_sample_dens()const 00556 { 00557 return SampleDens; 00558 } 00559 00560 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00561 uint32_t csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::get_psi_sample_dens()const 00562 { 00563 return m_psi.get_sample_dens(); 00564 } 00565 00566 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00567 void csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::set_psi_sample_dens(const uint32_t sample_dens) 00568 { 00569 m_psi.get_sample_dens(); 00570 } 00571 00572 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00573 template<typename RandomAccessContainer> 00574 void csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::construct_samples(const RandomAccessContainer& sa, const unsigned char* str) 00575 { 00576 m_sa_sample.set_int_width(bit_magic::l1BP(sa.size())+1); 00577 m_sa_sample.resize((sa.size()+get_sample_dens()-1)/get_sample_dens()); 00578 typename RandomAccessContainer::size_type i=0, idx=0; 00579 for (typename RandomAccessContainer::const_iterator it = sa.begin(); i < sa.size(); it += (difference_type)get_sample_dens(), i += get_sample_dens(), ++idx) { 00580 m_sa_sample[idx] = *it; 00581 } 00582 // const uint32_t SampleDens, uint32_t InvSampleDensISA = get_sample_dens()*16; 00583 m_isa_sample.set_int_width(bit_magic::l1BP(sa.size())+1); 00584 m_isa_sample.resize((sa.size()+(InvSampleDens)-1)/(InvSampleDens)); 00585 i = 0; 00586 for (typename RandomAccessContainer::const_iterator it = sa.begin(), end = sa.end(); it != end; ++it, ++i) { 00587 if ((*it % InvSampleDens) == 0) { 00588 m_isa_sample[*it/InvSampleDens] = i; 00589 } 00590 } 00591 } 00592 00593 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00594 template<typename RandomAccessContainer> 00595 void csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::construct(const RandomAccessContainer& sa, const unsigned char* str) 00596 { 00597 construct_samples(sa, str); 00598 #ifdef SDSL_DEBUG 00599 std::cerr<<"create encoded psi"<<std::endl; 00600 #endif 00601 if (sa.psi.size() > 0) { 00602 m_psi = EncVector(sa.psi); 00603 } else { 00604 m_psi = EncVector(); 00605 } 00606 m_psi_wrapper = psi_type(this); 00607 m_bwt = bwt_type(this); 00608 #ifdef SDSL_DEBUG 00609 std::cerr<<"encoded psi created"<<std::endl; 00610 #endif 00611 } 00612 00613 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00614 template<typename RandomAccessContainer> 00615 void csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::construct(RandomAccessContainer& sa, const unsigned char* str) 00616 { 00617 construct_samples(sa, str); 00618 #ifdef SDSL_DEBUG 00619 std::cerr<<"create encoded psi"<<std::endl; 00620 #endif 00621 if (sa.psi.size() > 0) { 00622 m_psi = EncVector(sa.psi); 00623 } else { 00624 m_psi = EncVector(); 00625 } 00626 m_psi_wrapper = psi_type(this); 00627 m_bwt = bwt_type(this); 00628 { 00629 // clear sa 00630 RandomAccessContainer tmp; 00631 tmp.swap(sa); 00632 } 00633 #ifdef SDSL_DEBUG 00634 std::cerr<<"encoded psi and created; sa destroyed"<<std::endl; 00635 #endif 00636 } 00637 00638 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00639 typename csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::const_iterator csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::begin()const 00640 { 00641 return const_iterator(this, 0); 00642 } 00643 00644 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00645 typename csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::const_iterator csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::end()const 00646 { 00647 return const_iterator(this, size()); 00648 } 00649 00650 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00651 inline typename csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::value_type csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::operator[](size_type i)const 00652 { 00653 size_type off = 0; 00654 while (i % SampleDens) {// while i mod SampleDens != 0 (SA[i] is not sampled) SG: auf keinen Fall get_sample_dens nehmen, ist total langsam 00655 i = m_psi[i]; // go to the position where SA[i]+1 is located 00656 ++off; // add 1 to the offset 00657 } 00658 value_type result = m_sa_sample[i/SampleDens]; 00659 if (result < off) { 00660 return m_psi.size()-(off-result); 00661 } else 00662 return result-off; 00663 } 00664 00665 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00666 inline typename csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::value_type csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::operator()(size_type i)const 00667 { 00668 value_type result = m_isa_sample[i/InvSampleDens]; // get the rightmost sampled isa value 00669 i = i % InvSampleDens; 00670 while (i--) { 00671 result = m_psi[result]; 00672 } 00673 // assert(((*this)[result])==j); 00674 return result; 00675 } 00676 00677 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00678 csa_sada<EncVector,SampleDens, InvSampleDens, fixedIntWidth>& csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::operator=(const csa_sada<EncVector,SampleDens, InvSampleDens, fixedIntWidth>& csa) 00679 { 00680 if (this != &csa) { 00681 copy(csa); 00682 } 00683 return *this; 00684 } 00685 00686 00687 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00688 typename csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::size_type csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::serialize(std::ostream& out, structure_tree_node* v, std::string name)const 00689 { 00690 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00691 size_type written_bytes = 0; 00692 written_bytes += m_psi.serialize(out, child, "psi"); 00693 written_bytes += m_sa_sample.serialize(out, child, "sa_samples"); 00694 written_bytes += m_isa_sample.serialize(out, child, "isa_samples"); 00695 written_bytes += m_char2comp.serialize(out, child, "char2comp"); 00696 written_bytes += m_comp2char.serialize(out, child, "comp2char"); 00697 written_bytes += m_C.serialize(out, child, "C"); 00698 written_bytes += util::write_member(m_sigma, out, child, "sigma"); 00699 structure_tree::add_size(child, written_bytes); 00700 return written_bytes; 00701 } 00702 00703 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00704 void csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::load(std::istream& in) 00705 { 00706 m_psi.load(in); 00707 m_sa_sample.load(in); 00708 m_isa_sample.load(in); 00709 m_char2comp.load(in); 00710 m_comp2char.load(in); 00711 m_C.load(in); 00712 util::read_member(m_sigma, in); 00713 m_psi_wrapper = psi_type(this); 00714 m_bwt = bwt_type(this); 00715 } 00716 00717 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00718 void csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::swap(csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>& csa) 00719 { 00720 if (this != &csa) { 00721 m_psi.swap(csa.m_psi); 00722 m_sa_sample.swap(csa.m_sa_sample); 00723 m_isa_sample.swap(csa.m_isa_sample); 00724 m_char2comp.swap(csa.m_char2comp); 00725 m_comp2char.swap(csa.m_comp2char); 00726 m_C.swap(csa.m_C); 00727 std::swap(m_sigma, csa.m_sigma); 00728 m_psi_wrapper = psi_type(this); 00729 csa.m_psi_wrapper = psi_type(&csa); 00730 m_bwt = bwt_type(this); 00731 csa.m_bwt = bwt_type(&csa); 00732 } 00733 } 00734 00735 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00736 bool csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::operator==(const csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>& csa)const 00737 { 00738 for (uint16_t i=0; i<256; ++i) 00739 if (m_char2comp[i] != csa.m_char2comp[i] or m_comp2char[i] != csa.m_comp2char[i] or m_C[i] != csa.m_C[i]) 00740 return false; 00741 return m_psi == csa.m_psi and m_sa_sample == csa.m_sa_sample and m_isa_sample == csa.m_isa_sample and m_C[256] == csa.m_C[256] and m_sigma == csa.m_sigma; 00742 } 00743 00744 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth> 00745 bool csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::operator!=(const csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>& csa)const 00746 { 00747 return !(*this == csa); 00748 // return m_psi != csa.m_psi or m_sa_sample != csa.m_sa_sample or m_isa_sample != csa.m_isa_sample; 00749 } 00750 00751 } // end namespace sdsl 00752 00753 #endif