SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/csa_sada.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2008 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef INCLUDED_SDSL_CSA_SADA
00022 #define INCLUDED_SDSL_CSA_SADA
00023 
00024 #include "enc_vector.hpp"
00025 #include "int_vector.hpp"
00026 #include "algorithms.hpp"
00027 #include "iterators.hpp"
00028 #include "suffixarrays.hpp"
00029 #include "suffixarray_helper.hpp"
00030 #include "util.hpp"
00031 #include "testutils.hpp"
00032 #include "bwt_construct.hpp"
00033 #include <iostream>
00034 #include <algorithm>
00035 #include <cassert>
00036 #include <cstring> // for strlen
00037 #include <iomanip>
00038 #include <iterator>
00039 
00040 namespace sdsl
00041 {
00042 
00043 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens,  uint8_t fixedIntWidth>
00044 class csa_sada;
00045 
00046 template<uint8_t fixedIntWidth>
00047 struct csa_sada_trait {
00048     typedef int_vector<0> int_vector_type;
00049 };
00050 
00051 template<>
00052 struct csa_sada_trait<32> {
00053     typedef int_vector<32> int_vector_type;
00054 };
00055 
00056 template<>
00057 struct csa_sada_trait<64> {
00058     typedef int_vector<64> int_vector_type;
00059 };
00060 
00061 
00063 
00071 template<class EncVector = enc_vector<>, uint32_t SampleDens = 32, uint32_t InvSampleDens = 64,  uint8_t fixedIntWidth = 0>
00072 class csa_sada
00073 {
00074     public:
00075         typedef uint64_t                                                                                              value_type;       // STL Container requirement
00076         typedef random_access_const_iterator<csa_sada>                                const_iterator;// STL Container requirement
00077         typedef const_iterator                                                                                iterator;         // STL Container requirement
00078         typedef const value_type                                                                              const_reference;
00079         typedef const_reference                                                                               reference;
00080         typedef const_reference*                                                                              pointer;
00081         typedef const pointer                                                                                 const_pointer;
00082         typedef int_vector<>::size_type                                                               size_type;                // STL Container requirement
00083         typedef size_type                                                                                             csa_size_type;
00084         typedef ptrdiff_t                                                                                             difference_type; // STL Container requirement
00085         typedef EncVector                                                                                             enc_vector_type;
00086         typedef psi_of_csa_psi<csa_sada>                                                              psi_type;
00087         typedef bwt_of_csa_psi<csa_sada>                                                              bwt_type;
00088         typedef const unsigned char*                                                                  pattern_type;
00089         typedef unsigned char                                                                                 char_type;
00090         typedef typename csa_sada_trait<fixedIntWidth>::int_vector_type   sa_sample_type;
00091         typedef typename csa_sada_trait<fixedIntWidth>::int_vector_type   isa_sample_type;
00092 
00093         typedef csa_tag                                                                                                   index_category;
00094 
00095         enum { sa_sample_dens = SampleDens,
00096                isa_sample_dens = InvSampleDens
00097              };
00098 
00099         friend class psi_of_csa_psi<csa_sada>;
00100         friend class bwt_of_csa_psi<csa_sada>;
00101 
00102                 static const int linear_decode_limit = 100000;
00103 
00104 //      static const uint32_t sample_dens = SampleDens;
00105     private:
00106         enc_vector_type m_psi;  // psi function
00107         psi_type m_psi_wrapper;
00108         bwt_type m_bwt;
00109         sa_sample_type  m_sa_sample; // suffix array samples
00110         isa_sample_type m_isa_sample; // inverse suffix array samples
00111         int_vector<8>   m_char2comp; // =0 for the 0-byte and all characters which do not occur in the text
00112         int_vector<8>   m_comp2char;
00113         int_vector<64>  m_C;
00114         uint16_t                m_sigma;
00115 
00116         uint64_t *m_psi_buf; //[SampleDens+1]; // buffer for decoded psi values
00117 
00118         template<typename RandomAccessContainer>
00119         void construct(const RandomAccessContainer& sa, const unsigned char* str);
00120 
00121         template<typename RandomAccessContainer>
00122         void construct(RandomAccessContainer& sa, const unsigned char* str);
00123 
00124         template<typename RandomAccessContainer>
00125         void construct_samples(const RandomAccessContainer& sa, const unsigned char* str);
00126 
00127         void copy(const csa_sada& csa) {
00128             m_psi = csa.m_psi;
00129             m_sa_sample = csa.m_sa_sample;
00130             m_isa_sample = csa.m_isa_sample;
00131             m_char2comp  = csa.m_char2comp;
00132             m_comp2char  = csa.m_comp2char;
00133             m_C = csa.m_C;
00134             m_sigma              = csa.m_sigma;
00135             m_psi_wrapper = psi_type(this);
00136             m_bwt = bwt_type(this);
00137         };
00138 
00139                 void create_buffer(){
00140                         if ( enc_vector_type::sample_dens < linear_decode_limit  ){
00141                                 m_psi_buf = new uint64_t[enc_vector_type::sample_dens+1];
00142                         }else{
00143                                 m_psi_buf = NULL;
00144                         }
00145                 }
00146 
00147                 void delete_buffer(){
00148                         if ( m_psi_buf != NULL ){
00149                                 delete [] m_psi_buf;
00150                         }
00151                 }
00152 
00153     public:
00154         const int_vector<8>& char2comp;
00155         const int_vector<8>& comp2char;
00156         const int_vector<64>& C;
00157         const uint16_t& sigma;
00158         const psi_type& psi;
00159         const bwt_type& bwt;
00160         const sa_sample_type& sa_sample;
00161         const isa_sample_type& isa_sample;
00162 
00163 
00165         csa_sada():char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma) ,psi(m_psi_wrapper), bwt(m_bwt), sa_sample(m_sa_sample), isa_sample(m_isa_sample) {
00166             util::assign(m_char2comp, int_vector<8>(256, 0));
00167             util::assign(m_comp2char, int_vector<8>(256, 0));
00168             util::assign(m_C, int_vector<64>(257, 0));
00169             m_sigma = 0;
00170                         create_buffer();
00171         }
00173         ~csa_sada() {
00174                         delete_buffer();
00175                 }
00176 
00178         csa_sada(const csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>& csa):char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma), psi(m_psi_wrapper), bwt(m_bwt), sa_sample(m_sa_sample), isa_sample(m_isa_sample) {
00179                         create_buffer();
00180             copy(csa);
00181         }
00182 
00184 
00187         template<typename RandomAccessContainer>
00188         csa_sada(const RandomAccessContainer& sa, const unsigned char* str);
00189 
00191 
00194         template<typename RandomAccessContainer>
00195         csa_sada(RandomAccessContainer& sa, const unsigned char* str);
00196 
00198         template<class size_type_class, uint8_t int_width, class size_type_class_1>
00199         csa_sada(int_vector_file_buffer<8, size_type_class>& bwt_buf,
00200                  int_vector_file_buffer<int_width, size_type_class_1>& sa_buf);
00201 
00202         csa_sada(tMSS& file_map, const std::string& dir, const std::string& id);
00203 
00204         void construct(tMSS& file_map, const std::string& dir, const std::string& id);
00205 
00207 
00209         csa_sada(const unsigned char* str);
00210 
00212 
00217         size_type size()const {
00218             return m_psi.size();
00219         }
00220 
00222 
00225         static size_type max_size() {
00226             return EncVector::max_size();
00227         }
00228 
00230 
00233         bool empty()const {
00234             return m_psi.empty();
00235         }
00236 
00238 
00246         void swap(csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>& csa);
00247 
00249 
00252         const_iterator begin()const;
00253 
00255 
00258         const_iterator end()const;
00259 
00261 
00267         inline value_type operator[](size_type i)const;
00268 
00270 
00275         inline value_type operator()(size_type i)const;
00276 
00278 
00281         csa_sada& operator=(const csa_sada& csa);
00282 
00284 
00289         bool operator==(const csa_sada& csa)const;
00290 
00292 
00297         bool operator!=(const csa_sada& csa)const;
00298 
00300 
00303         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const;
00304 
00306 
00308         void load(std::istream& in);
00309 
00310         uint32_t get_sample_dens() const;
00311 
00312         uint32_t get_psi_sample_dens() const;
00313         void set_psi_sample_dens(const uint32_t sample_dens);
00314 
00316 
00323         size_type rank_bwt(size_type i, const unsigned char c)const {
00324             unsigned char cc = m_char2comp[c];
00325             if (cc==0 and c!=0)  // character is not in the text => return 0
00326                 return 0;
00327             if (i == 0)
00328                 return 0;
00329             assert(i <= size());
00330 
00331             size_type lower_b, upper_b; // lower_b inclusive, upper_b exclusive
00332 
00333             const size_type sd = m_psi.get_sample_dens();
00334             size_type lower_sb = (m_C[cc]+sd-1)/sd; // lower_sb inclusive
00335             size_type upper_sb = (m_C[cc+1]+sd-1)/sd; // upper_sb exclusive
00336             while (lower_sb+1 < upper_sb) {
00337                 size_type mid = (lower_sb+upper_sb)/2;
00338                 if (m_psi.sample(mid) >= i)
00339                     upper_sb = mid;
00340                 else
00341                     lower_sb = mid;
00342             }
00343 
00344             if (lower_sb == upper_sb) { // the interval was smaller than sd
00345                 lower_b = m_C[cc]; upper_b = m_C[cc+1];
00346             } else if (lower_sb > (m_C[cc]+sd-1)/sd) { // main case
00347 // TODO: don't use get_inter_sampled_values if SampleDens is really
00348 //       large                          
00349                 lower_b = lower_sb*sd;
00350                                 if ( m_psi_buf == NULL ){
00351                                         upper_b = std::min(upper_sb*sd, m_C[cc+1]);
00352                                         goto finish;
00353                                 }
00354                 uint64_t* p=m_psi_buf;
00355                 // extract the psi values between two samples
00356                 m_psi.get_inter_sampled_values(lower_sb, p);
00357                 p = m_psi_buf;
00358                 uint64_t smpl = m_psi.sample(lower_sb);
00359                 // handle border cases
00360                 if (lower_b + m_psi.get_sample_dens() >= m_C[cc+1])
00361                     m_psi_buf[ m_C[cc+1]-lower_b ] = size()-smpl;
00362                 else
00363                     m_psi_buf[ m_psi.get_sample_dens() ] = size()-smpl;
00364                 // search the result linear
00365                 while ((*p++)+smpl < i);
00366 
00367                 return p-1-m_psi_buf + lower_b - m_C[cc];
00368             } else { // lower_b == (m_C[cc]+sd-1)/sd and lower_sb < upper_sb
00369                 if (m_psi.sample(lower_sb) >= i) {
00370                     lower_b = m_C[cc];
00371                     upper_b = lower_sb * sd + 1;
00372                 } else {
00373                     lower_b = lower_sb * sd;
00374                     upper_b = std::min(upper_sb*sd, m_C[cc+1]);
00375                 }
00376             }
00377 finish:
00378             // binary search the interval [C[cc]..C[cc+1]-1] for the result
00379 //                      size_type lower_b = m_C[cc], upper_b = m_C[cc+1]; // lower_b inclusive, upper_b exclusive
00380             while (lower_b+1 < upper_b) {
00381                 size_type mid = (lower_b+upper_b)/2;
00382                 if (m_psi[mid] >= i)
00383                     upper_b = mid;
00384                 else
00385                     lower_b = mid;
00386             }
00387             if (lower_b > m_C[cc])
00388                 return lower_b - m_C[cc] + 1;
00389             else { // lower_b == m_C[cc]
00390                 return m_psi[lower_b] < i;// 1 if m_psi[lower_b]<i, 0 otherwise
00391             }
00392         }
00393 
00395 
00402         size_type select_bwt(size_type i, const unsigned char c)const {
00403             assert(i > 0);
00404             unsigned char cc = m_char2comp[c];
00405             if (cc==0 and c!=0)  // character is not in the text => return 0
00406                 return size();
00407             assert(cc != 255);
00408             if (m_C[cc]+i-1 <  m_C[cc+1]) {
00409                 return m_psi[m_C[cc]+i-1];
00410             } else
00411                 return size();
00412         }
00413 };
00414 
00415 // == template functions ==
00416 
00417 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00418 csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::csa_sada(const unsigned char* str):char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma), psi(m_psi_wrapper), bwt(m_bwt), sa_sample(m_sa_sample), isa_sample(m_isa_sample)
00419 {
00420         create_buffer();
00421     csa_uncompressed sa(str);
00422 //      size_type n = strlen((const char*)str);
00423 //      int_vector<> sa(n+1, 0, bit_magic::l1BP(n+1)+1);
00424 //      algorithm::calculate_sa(str, n+1, sa);   // calculate the suffix array sa of str
00425 //      assert(sa.size() == n+1);
00426     algorithm::set_text<csa_sada>(str, sa.size(), m_C, m_char2comp, m_comp2char, m_sigma);
00427     construct(sa, str);
00428 //      if( n+1 > 0 and n+1 != size() )
00429 //              throw std::logic_error("csa_sada: text size differ with sa size!");
00430 }
00431 
00432 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00433 template<typename RandomAccessContainer>
00434 csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::csa_sada(const RandomAccessContainer& sa, const unsigned char* str):char2comp(m_char2comp), comp2char(m_comp2char),C(m_C), sigma(m_sigma), psi(m_psi_wrapper), bwt(m_bwt), sa_sample(m_sa_sample), isa_sample(m_isa_sample)
00435 {
00436         create_buffer();
00437     size_type n = 1;
00438     if (str != NULL) {
00439         n = strlen((const char*)str);
00440     }
00441     algorithm::set_text<csa_sada>(str, n+1, m_C, m_char2comp, m_comp2char, m_sigma);
00442     assert(sa.size() == n+1);
00443     construct(sa, str);
00444     if (n+1 > 0 and n+1 != size())
00445         throw std::logic_error(util::demangle(typeid(this).name())+": text size differ with sa size!");
00446 }
00447 
00448 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00449 template<typename RandomAccessContainer>
00450 csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::csa_sada(RandomAccessContainer& sa, const unsigned char* str):char2comp(m_char2comp), comp2char(m_comp2char),C(m_C), sigma(m_sigma), psi(m_psi_wrapper), bwt(m_bwt), sa_sample(m_sa_sample), isa_sample(m_isa_sample)
00451 {
00452         create_buffer();
00453     size_type n = 1;
00454     if (str != NULL) {
00455         n = strlen((const char*)str);
00456     }
00457     assert(sa.size() == n+1);
00458     algorithm::set_text<csa_sada>(str, n+1, m_C, m_char2comp, m_comp2char, m_sigma);
00459     construct(sa, str);
00460     if (n+1 > 0 and n+1 != size())
00461         throw std::logic_error("csa_sada: text size differ with sa size!");
00462 }
00463 
00464 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00465 template<class size_type_class, uint8_t int_width, class size_type_class_1>
00466 csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::csa_sada(int_vector_file_buffer<8, size_type_class>& bwt_buf,
00467         int_vector_file_buffer<int_width, size_type_class_1>& sa_buf):
00468     char2comp(m_char2comp), comp2char(m_comp2char),C(m_C), sigma(m_sigma), psi(m_psi_wrapper), bwt(m_bwt), sa_sample(m_sa_sample), isa_sample(m_isa_sample)
00469 {
00470         create_buffer();
00471     bwt_buf.reset(); sa_buf.reset();
00472     size_type n = bwt_buf.int_vector_size;
00473     algorithm::set_text<csa_sada>(bwt_buf, n, m_C, m_char2comp, m_comp2char, m_sigma);
00474     assert(sa_buf.int_vetor_size == sa_buf.int_vector_size);
00475 
00476     size_type cnt_chr[256] = {0};
00477     for (uint32_t i=0; i<m_sigma; ++i)
00478         cnt_chr[m_comp2char[i]] = C[i];
00479     stop_watch sw; sw.start();
00480 //      write_R_output(sw, "psi function", "construct","begin");
00481     // calculate psi
00482     {
00483         bwt_buf.reset();
00484         int_vector<> temp(n, 0, bit_magic::l1BP(n)+1);
00485         for (size_type i=0, r_sum=0, r=bwt_buf.load_next_block(); r_sum < n;) {
00486             for (; i < r_sum+r; ++i) {
00487                 temp[ cnt_chr[ bwt_buf[i-r_sum] ]++ ] = i;
00488             }
00489             r_sum += r; r = bwt_buf.load_next_block();
00490         }
00491         util::store_to_file(temp, "/tmp/deleteme");
00492     }
00493 //      write_R_output(sw, "psi function", "construct","end");
00494     int_vector_file_buffer<> psi_buf("/tmp/deleteme");
00495 //      write_R_output(sw, "encoded psi", "construct", "begin");
00496     m_psi = EncVector(psi_buf);
00497 //      write_R_output(sw, "encoded psi", "construct", "end");
00498     std::remove("/tmp/deleteme");
00499     algorithm::set_sa_and_isa_samples<csa_sada>(sa_buf, m_sa_sample, m_isa_sample);
00500     m_psi_wrapper = psi_type(this);
00501     m_bwt = bwt_type(this);
00502 }
00503 
00504 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00505 csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::csa_sada(tMSS& file_map, const std::string& dir, const std::string& id):
00506     char2comp(m_char2comp), comp2char(m_comp2char),C(m_C), sigma(m_sigma), psi(m_psi_wrapper), bwt(m_bwt), sa_sample(m_sa_sample), isa_sample(m_isa_sample)
00507 {
00508         create_buffer();
00509     construct(file_map, dir, id);
00510 }
00511 
00512 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00513 void csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::construct(tMSS& file_map, const std::string& dir, const std::string& id)
00514 {
00515     if (file_map.find("bwt") == file_map.end()) { // if bwt is not already stored on disk => construct bwt
00516         construct_bwt(file_map, dir, id);
00517     }
00518     int_vector_file_buffer<8> bwt_buf(file_map["bwt"].c_str());
00519     size_type n = bwt_buf.int_vector_size;
00520     algorithm::set_text<csa_sada>(bwt_buf, n, m_C, m_char2comp, m_comp2char, m_sigma);
00521 
00522     size_type cnt_chr[256] = {0};
00523     for (uint32_t i=0; i<m_sigma; ++i)
00524         cnt_chr[m_comp2char[i]] = C[i];
00525     stop_watch sw; sw.start();
00526     write_R_output("csa", "construct PSI","begin",1,0);
00527     // calculate psi
00528     {
00529         bwt_buf.reset();
00530         int_vector<> psi(n, 0, bit_magic::l1BP(n)+1);
00531         for (size_type i=0, r_sum=0, r=bwt_buf.load_next_block(); r_sum < n;) {
00532             for (; i < r_sum+r; ++i) {
00533                 psi[ cnt_chr[ bwt_buf[i-r_sum] ]++ ] = i;
00534             }
00535             r_sum += r; r = bwt_buf.load_next_block();
00536         }
00537         if (!util::store_to_file(psi, (dir+"psi_"+id).c_str())) {
00538             throw std::ios_base::failure("#csa_sada: Cannot store PSI to file system!");
00539         } else {
00540             file_map["psi"] = dir+"psi_"+id;
00541         }
00542     }
00543     write_R_output("csa", "construct PSI","end",1,0);
00544     int_vector_file_buffer<> psi_buf(file_map["psi"].c_str());
00545 //      write_R_output(sw, "encoded psi", "construct", "begin");
00546     m_psi = EncVector(psi_buf);
00547 //      write_R_output(sw, "encoded psi", "construct", "end");
00548     int_vector_file_buffer<>  sa_buf(file_map["sa"].c_str());
00549     algorithm::set_sa_and_isa_samples<csa_sada>(sa_buf, m_sa_sample, m_isa_sample);
00550     m_psi_wrapper = psi_type(this);
00551     m_bwt = bwt_type(this);
00552 }
00553 
00554 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00555 uint32_t csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::get_sample_dens()const
00556 {
00557     return SampleDens;
00558 }
00559 
00560 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00561 uint32_t csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::get_psi_sample_dens()const
00562 {
00563     return m_psi.get_sample_dens();
00564 }
00565 
00566 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00567 void csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::set_psi_sample_dens(const uint32_t sample_dens)
00568 {
00569     m_psi.get_sample_dens();
00570 }
00571 
00572 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00573 template<typename RandomAccessContainer>
00574 void csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::construct_samples(const RandomAccessContainer& sa, const unsigned char* str)
00575 {
00576     m_sa_sample.set_int_width(bit_magic::l1BP(sa.size())+1);
00577     m_sa_sample.resize((sa.size()+get_sample_dens()-1)/get_sample_dens());
00578     typename RandomAccessContainer::size_type i=0, idx=0;
00579     for (typename RandomAccessContainer::const_iterator it = sa.begin(); i < sa.size(); it += (difference_type)get_sample_dens(), i += get_sample_dens(), ++idx) {
00580         m_sa_sample[idx] = *it;
00581     }
00582 //      const uint32_t SampleDens, uint32_t InvSampleDensISA = get_sample_dens()*16;
00583     m_isa_sample.set_int_width(bit_magic::l1BP(sa.size())+1);
00584     m_isa_sample.resize((sa.size()+(InvSampleDens)-1)/(InvSampleDens));
00585     i = 0;
00586     for (typename RandomAccessContainer::const_iterator it = sa.begin(), end = sa.end(); it != end; ++it, ++i) {
00587         if ((*it % InvSampleDens) == 0) {
00588             m_isa_sample[*it/InvSampleDens] = i;
00589         }
00590     }
00591 }
00592 
00593 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00594 template<typename RandomAccessContainer>
00595 void csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::construct(const RandomAccessContainer& sa, const unsigned char* str)
00596 {
00597     construct_samples(sa, str);
00598 #ifdef SDSL_DEBUG
00599     std::cerr<<"create encoded psi"<<std::endl;
00600 #endif
00601     if (sa.psi.size() > 0) {
00602         m_psi = EncVector(sa.psi);
00603     } else {
00604         m_psi = EncVector();
00605     }
00606     m_psi_wrapper = psi_type(this);
00607     m_bwt = bwt_type(this);
00608 #ifdef SDSL_DEBUG
00609     std::cerr<<"encoded psi created"<<std::endl;
00610 #endif
00611 }
00612 
00613 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00614 template<typename RandomAccessContainer>
00615 void csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::construct(RandomAccessContainer& sa, const unsigned char* str)
00616 {
00617     construct_samples(sa, str);
00618 #ifdef SDSL_DEBUG
00619     std::cerr<<"create encoded psi"<<std::endl;
00620 #endif
00621     if (sa.psi.size() > 0) {
00622         m_psi = EncVector(sa.psi);
00623     } else {
00624         m_psi = EncVector();
00625     }
00626     m_psi_wrapper = psi_type(this);
00627     m_bwt = bwt_type(this);
00628     {
00629         // clear sa
00630         RandomAccessContainer tmp;
00631         tmp.swap(sa);
00632     }
00633 #ifdef SDSL_DEBUG
00634     std::cerr<<"encoded psi and created; sa destroyed"<<std::endl;
00635 #endif
00636 }
00637 
00638 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00639 typename csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::const_iterator csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::begin()const
00640 {
00641     return const_iterator(this, 0);
00642 }
00643 
00644 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00645 typename csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::const_iterator csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::end()const
00646 {
00647     return const_iterator(this, size());
00648 }
00649 
00650 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00651 inline typename csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::value_type csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::operator[](size_type i)const
00652 {
00653     size_type off = 0;
00654     while (i % SampleDens) {// while i mod SampleDens != 0 (SA[i] is not sampled)   SG: auf keinen Fall get_sample_dens nehmen, ist total langsam
00655         i = m_psi[i];       // go to the position where SA[i]+1 is located
00656         ++off;              // add 1 to the offset
00657     }
00658     value_type result = m_sa_sample[i/SampleDens];
00659     if (result < off) {
00660         return m_psi.size()-(off-result);
00661     } else
00662         return result-off;
00663 }
00664 
00665 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00666 inline typename csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::value_type csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::operator()(size_type i)const
00667 {
00668     value_type result = m_isa_sample[i/InvSampleDens]; // get the rightmost sampled isa value
00669     i = i % InvSampleDens;
00670     while (i--) {
00671         result = m_psi[result];
00672     }
00673 //      assert(((*this)[result])==j);
00674     return result;
00675 }
00676 
00677 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00678 csa_sada<EncVector,SampleDens, InvSampleDens, fixedIntWidth>& csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::operator=(const csa_sada<EncVector,SampleDens, InvSampleDens, fixedIntWidth>& csa)
00679 {
00680     if (this != &csa) {
00681         copy(csa);
00682     }
00683     return *this;
00684 }
00685 
00686 
00687 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00688 typename csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::size_type csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::serialize(std::ostream& out, structure_tree_node* v, std::string name)const
00689 {
00690     structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
00691     size_type written_bytes = 0;
00692     written_bytes += m_psi.serialize(out, child, "psi");
00693     written_bytes += m_sa_sample.serialize(out, child, "sa_samples");
00694     written_bytes += m_isa_sample.serialize(out, child, "isa_samples");
00695     written_bytes += m_char2comp.serialize(out, child, "char2comp");
00696     written_bytes += m_comp2char.serialize(out, child, "comp2char");
00697     written_bytes += m_C.serialize(out, child, "C");
00698     written_bytes += util::write_member(m_sigma, out, child, "sigma");
00699     structure_tree::add_size(child, written_bytes);
00700     return written_bytes;
00701 }
00702 
00703 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00704 void csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::load(std::istream& in)
00705 {
00706     m_psi.load(in);
00707     m_sa_sample.load(in);
00708     m_isa_sample.load(in);
00709     m_char2comp.load(in);
00710     m_comp2char.load(in);
00711     m_C.load(in);
00712     util::read_member(m_sigma, in);
00713     m_psi_wrapper = psi_type(this);
00714     m_bwt = bwt_type(this);
00715 }
00716 
00717 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00718 void csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::swap(csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>& csa)
00719 {
00720     if (this != &csa) {
00721         m_psi.swap(csa.m_psi);
00722         m_sa_sample.swap(csa.m_sa_sample);
00723         m_isa_sample.swap(csa.m_isa_sample);
00724         m_char2comp.swap(csa.m_char2comp);
00725         m_comp2char.swap(csa.m_comp2char);
00726         m_C.swap(csa.m_C);
00727         std::swap(m_sigma, csa.m_sigma);
00728         m_psi_wrapper = psi_type(this);
00729         csa.m_psi_wrapper = psi_type(&csa);
00730         m_bwt = bwt_type(this);
00731         csa.m_bwt = bwt_type(&csa);
00732     }
00733 }
00734 
00735 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00736 bool csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::operator==(const csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>& csa)const
00737 {
00738     for (uint16_t i=0; i<256; ++i)
00739         if (m_char2comp[i] != csa.m_char2comp[i] or m_comp2char[i] != csa.m_comp2char[i] or m_C[i] != csa.m_C[i])
00740             return false;
00741     return m_psi == csa.m_psi and m_sa_sample == csa.m_sa_sample and m_isa_sample == csa.m_isa_sample and m_C[256] == csa.m_C[256] and m_sigma == csa.m_sigma;
00742 }
00743 
00744 template<class EncVector, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00745 bool csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>::operator!=(const csa_sada<EncVector, SampleDens, InvSampleDens, fixedIntWidth>& csa)const
00746 {
00747     return !(*this == csa);
00748 //      return m_psi != csa.m_psi or m_sa_sample != csa.m_sa_sample or m_isa_sample != csa.m_isa_sample;
00749 }
00750 
00751 } // end namespace sdsl
00752 
00753 #endif