SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/csa_wt.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2009 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef INCLUDED_SDSL_CSA_WT
00022 #define INCLUDED_SDSL_CSA_WT
00023 
00024 #ifdef SDSL_DEBUG
00025 #define SDSL_DEBUG_CSA_WT
00026 #endif
00027 
00028 #include "wt_huff.hpp"
00029 #include "algorithms.hpp"
00030 #include "iterators.hpp"
00031 #include "util.hpp"
00032 #include "suffixarrays.hpp"
00033 #include "bwt_construct.hpp"
00034 #include "fast_cache.hpp"
00035 #include <iostream>
00036 #include <algorithm> // for std::swap
00037 #include <cassert>
00038 #include <cstring> // for strlen
00039 #include <iomanip>
00040 #include <iterator>
00041 
00042 namespace sdsl
00043 {
00044 
00045 template<class WaveletTree = wt_huff<>, uint32_t SampleDens = 32, uint32_t InvSampleDens = 64,  uint8_t fixedIntWidth = 0, class charType=unsigned char> // forward declaration
00046 class csa_wt;
00047 
00048 template<uint8_t fixedIntWidth>
00049 struct csa_wt_trait {
00050     typedef int_vector<0> int_vector_type;
00051 };
00052 
00053 template<>
00054 struct csa_wt_trait<32> {
00055     typedef int_vector<32> int_vector_type;
00056 };
00057 
00058 template<>
00059 struct csa_wt_trait<64> {
00060     typedef int_vector<64> int_vector_type;
00061 };
00062 
00064 template<class CsaWT>
00065 class psi_of_csa_wt
00066 {
00067     public:
00068         typedef typename CsaWT::value_type value_type;
00069         typedef typename CsaWT::size_type size_type;
00070         typedef typename CsaWT::char_type char_type;
00071         typedef typename CsaWT::difference_type difference_type;
00072         typedef random_access_const_iterator<psi_of_csa_wt> const_iterator;// STL Container requirement
00073     private:
00074         CsaWT* m_csa_wt; //<- pointer to the (compressed) suffix array that is based on a wavelet tree
00075     public:
00076 
00078         psi_of_csa_wt(CsaWT* csa_wt=NULL) {
00079             m_csa_wt = csa_wt;
00080         }
00081 
00083         psi_of_csa_wt(const psi_of_csa_wt& psi_of_csa) {
00084             m_csa_wt = psi_of_csa.m_csa_wt;
00085         }
00086 
00088 
00092         value_type operator[](size_type i)const {
00093             assert(m_csa_wt != NULL);
00094             char_type c = algorithm::get_ith_character_of_the_first_row(i, *m_csa_wt);
00095             return m_csa_wt->wavelet_tree.select(i - m_csa_wt->C[m_csa_wt->char2comp[c]] + 1 , c);
00096         }
00097 
00099 
00104         value_type psi_k(size_type i, size_type k)const {
00105             assert(m_csa_wt != NULL);
00106             if (k < m_csa_wt->sa_sample_dens + m_csa_wt->isa_sample_dens) {
00107                 for (size_type j=0; j<k; ++j) {
00108                     i = (*this)[i];
00109                 }
00110                 return i;
00111             } else {
00112                 size_type x = (*m_csa_wt)[i];
00113                 x += k;
00114                 if (x >= m_csa_wt->size()) {
00115                     x -= m_csa_wt->size();
00116                 }
00117                 return (*m_csa_wt)(x);
00118             }
00119         }
00120 
00122 
00127         value_type operator()(size_type i)const {
00128             assert(m_csa_wt != NULL);
00129             assert(i < size());
00130             // (1) get the ith character in the bwt, this takes \f$\log |\Sigma| \f$ time with use of the wavelet tree
00131 //              value_type c = m_csa_wt->m_wavelet_tree[i];
00132             // (2) next count how many symbols c are in the prefix [0..i-1] of the bwt, this takes \f$\log |\Sigma| \f$ time with use of the wavelet tree.
00133 //              size_type j = m_csa_wt->m_wavelet_tree.rank(i, c);
00134             // TODO: (1) and (2) can be calculated in only one request to the wavelet tree -> method rank_ith_symbol !!!
00135             // (3) calculate the position of the j+1 th c in the sorted string of the bwt in constant time.
00136             typename CsaWT::char_type c;
00137             size_type j = m_csa_wt->m_wavelet_tree.rank_ith_symbol(i,c); // see documentation of rank_ith_symbol in wt_huff
00138             return m_csa_wt->C[ m_csa_wt->char2comp[c] ] + j;
00139         }
00140 
00142 
00147         value_type lf_k(size_type i, size_type k)const {
00148             assert(m_csa_wt != NULL);
00149             if (k < m_csa_wt->sa_sample_dens + m_csa_wt->isa_sample_dens) {
00150                 for (size_type j=0; j<k; ++j) {
00151                     i = (*this)(i);
00152                 }
00153                 return i;
00154             } else {
00155                 size_type x = (*m_csa_wt)[i];
00156                 if (x < k) {
00157                     x += m_csa_wt->size();
00158                 }
00159                 x -= k;
00160                 return (*m_csa_wt)(x);
00161             }
00162         }
00163 
00165         psi_of_csa_wt& operator=(const psi_of_csa_wt& psi_of_csa) {
00166             if (this != &psi_of_csa) {
00167                 m_csa_wt = psi_of_csa.m_csa_wt;
00168             }
00169             return *this;
00170         }
00171 
00173 
00175         bool operator==(const psi_of_csa_wt& psi) {
00176             return true;
00177         }
00178 
00180         size_type size()const {
00181             return m_csa_wt->size();
00182         }
00183 
00185         size_type empty()const {
00186             return m_csa_wt->empty();
00187         }
00188 
00190         void swap(psi_of_csa_wt& psi_of_csa) {
00191             if (this != &psi_of_csa) {
00192                 ;// std::swap(m_csa_wt, psi_of_csa.m_csa_wt); // do not exchange the supported structure!
00193             }
00194         }
00195 
00197 
00200         const_iterator begin()const {
00201             return const_iterator(this, 0);
00202         }
00203 
00205 
00208         const_iterator end()const {
00209             return const_iterator(this, size());
00210         }
00211 
00212         // Get the number of sampled \f$\Psi\f$ values. In the wavelet tree approach the number of sampled (i.e. explicitly stored) \f$\Psi\f$ values is zero.
00213         uint32_t get_sample_dens()const {
00214             return 0;
00215         }
00216 };
00217 
00218 template<class CsaWT>
00219 class bwt_of_csa_wt
00220 {
00221     public:
00222         typedef const typename CsaWT::char_type value_type;
00223         typedef typename CsaWT::size_type size_type;
00224         typedef typename CsaWT::difference_type difference_type;
00225         typedef random_access_const_iterator<bwt_of_csa_wt> const_iterator;// STL Container requirement
00226     private:
00227         CsaWT* m_csa_wt; //<- pointer to the (compressed) suffix array that is based on a wavelet tree
00228     public:
00229 
00231         bwt_of_csa_wt(CsaWT* csa_wt=NULL) {
00232             m_csa_wt = csa_wt;
00233         }
00234 
00236         bwt_of_csa_wt(const bwt_of_csa_wt& bwt_of_csa) {
00237             m_csa_wt = bwt_of_csa.m_csa_wt;
00238         }
00239 
00241 
00245         value_type operator[](size_type i)const {
00246             assert(m_csa_wt != NULL);
00247             assert(i < size());
00248             return m_csa_wt->m_wavelet_tree[i];
00249         }
00250 
00252         bwt_of_csa_wt& operator=(const bwt_of_csa_wt& bwt_of_csa) {
00253             if (this != &bwt_of_csa) {
00254                 m_csa_wt = bwt_of_csa.m_csa_wt;
00255             }
00256             return *this;
00257         }
00258 
00260         size_type size()const {
00261             return m_csa_wt->size();
00262         }
00263 
00265         size_type empty()const {
00266             return m_csa_wt->empty();
00267         }
00268 
00270         void swap(bwt_of_csa_wt& bwt_of_csa) {
00271             if (this != &bwt_of_csa) {
00272                 ;// std::swap(m_csa_wt, bwt_of_csa.m_csa_wt); // do not exchange the supported structure!
00273             }
00274         }
00275 
00277 
00280         const_iterator begin()const {
00281             return const_iterator(this, 0);
00282         }
00283 
00285 
00288         const_iterator end()const {
00289             return const_iterator(this, size());
00290         }
00291 };
00292 
00293 
00295 
00302 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens,  uint8_t fixedIntWidth, class charType>
00303 class csa_wt
00304 {
00305     public:
00306         typedef uint64_t                                                                                         value_type;    // STL Container requirement
00307         typedef random_access_const_iterator<csa_wt>                             const_iterator;// STL Container requirement
00308         typedef const_iterator                                                                           iterator;              // STL Container requirement
00309         typedef const value_type                                                                         const_reference;
00310         typedef const_reference                                                                          reference;
00311         typedef const_reference*                                                                         pointer;
00312         typedef const pointer                                                                            const_pointer;
00313         typedef int_vector<>::size_type                                                          size_type;             // STL Container requirement
00314         typedef size_type                                                                                        csa_size_type;
00315         typedef ptrdiff_t                                                                                        difference_type; // STL Container requirement
00316         typedef psi_of_csa_wt<csa_wt>                                                            psi_type;
00317         typedef bwt_of_csa_wt<csa_wt>                                                            bwt_type;
00318         typedef WaveletTree                                                                                      wavelet_tree_type;
00319         typedef charType                                                                                         char_type;
00320         typedef const char_type*                                                                         pattern_type;
00321         typedef typename csa_wt_trait<fixedIntWidth>::int_vector_type sa_sample_type;
00322         typedef typename csa_wt_trait<fixedIntWidth>::int_vector_type isa_sample_type;
00323 
00324         typedef csa_tag                                                                                                 index_category;
00325 
00326 
00327 
00328         enum { sa_sample_dens = SampleDens,
00329                isa_sample_dens = InvSampleDens
00330              };
00331 
00332         friend class psi_of_csa_wt<csa_wt>;
00333         friend class bwt_of_csa_wt<csa_wt>;
00334 //      static const uint32_t sample_dens = SampleDens;
00335     private:
00336         WaveletTree             m_wavelet_tree; // the wavelet tree
00337         psi_type m_psi;  // psi function
00338         bwt_type m_bwt;  // bwt
00339         sa_sample_type m_sa_sample; // suffix array samples
00340         isa_sample_type m_isa_sample; // inverse suffix array samples
00341         int_vector<8>   m_char2comp;
00342         int_vector<8>   m_comp2char;
00343 
00344         int_vector<64>  m_C; // counts for the compact alphabet [0..sigma-1]
00345         uint16_t                m_sigma;
00346 //              uint32_t m_sample_dens; // additional to SampleDens value
00347 //#define USE_CSA_CACHE
00348 #ifdef USE_CSA_CACHE
00349         mutable fast_cache csa_cache;
00350 #endif
00351 
00352         template<typename RandomAccessContainer>
00353         void construct(const RandomAccessContainer& sa, const char_type* str);
00354 
00355         template<typename RandomAccessContainer>
00356         void construct_samples(const RandomAccessContainer& sa, const char_type* str);
00357 
00358         void copy(const csa_wt& csa) {
00359             m_wavelet_tree                      = csa.m_wavelet_tree;
00360             m_sa_sample                         = csa.m_sa_sample;
00361             m_isa_sample                        = csa.m_isa_sample;
00362             m_sigma                                     = csa.m_sigma;
00363             m_char2comp                         = csa.m_char2comp;
00364             m_comp2char                         = csa.m_comp2char;
00365             m_C = csa.m_C;
00366             m_psi = psi_type(this);
00367             m_bwt = bwt_type(this);
00368         }
00369 
00370     public:
00371         const int_vector<8>& char2comp;
00372         const int_vector<8>& comp2char;
00373         const int_vector<64>& C;
00374         const uint16_t& sigma;
00375         const psi_type& psi;
00376         const bwt_type& bwt;
00377         const sa_sample_type& sa_sample;
00378         const isa_sample_type& isa_sample;
00379         const wavelet_tree_type& wavelet_tree;
00380 
00382         csa_wt():char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma) ,psi(m_psi), bwt(m_bwt),sa_sample(m_sa_sample), isa_sample(m_isa_sample), wavelet_tree(m_wavelet_tree) {}
00384         ~csa_wt() {}
00386         csa_wt(const csa_wt& csa): m_sa_sample(csa.m_sa_sample), m_isa_sample(csa.m_isa_sample),  char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma), psi(m_psi), bwt(m_bwt), sa_sample(m_sa_sample), isa_sample(m_isa_sample), wavelet_tree(m_wavelet_tree) {
00387             copy(csa);
00388         }
00389 
00391         template<typename RandomAccessContainer>
00392         csa_wt(const RandomAccessContainer& sa, const char_type* str);
00393 
00395         csa_wt(const char_type* str);
00396 
00398         template<class size_type_class, uint8_t int_width, class size_type_class_1>
00399         csa_wt(int_vector_file_buffer<8, size_type_class>& bwt_buf,
00400                int_vector_file_buffer<int_width, size_type_class_1>& sa_buf
00401               );
00402 
00404         template<class size_type_class>
00405         csa_wt(int_vector_file_buffer<8, size_type_class>& bwt_buf);
00406 
00407         csa_wt(tMSS& file_map, const std::string& dir, const std::string& id);
00408 
00409         void construct(tMSS& file_map, const std::string& dir, const std::string& id);
00410 
00412 
00417         size_type size()const {
00418             return m_wavelet_tree.size();
00419         }
00420 
00422 
00425         static size_type max_size() {
00426             return bit_vector::max_size();
00427         }
00428 
00430 
00433         bool empty()const {
00434             return m_wavelet_tree.empty();
00435         }
00436 
00438 
00446         void swap(csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>& csa);
00447 
00449 
00452         const_iterator begin()const;
00453 
00455 
00458         const_iterator end()const;
00459 
00461 
00467         inline value_type operator[](size_type i)const;
00468 
00470 
00475         inline value_type operator()(size_type i)const;
00476 
00478 
00481         csa_wt& operator=(const csa_wt& csa);
00482 
00484 
00489         bool operator==(const csa_wt& csa)const;
00490 
00492 
00497         bool operator!=(const csa_wt& csa)const;
00498 
00500 
00503         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const;
00504 
00506 
00508         void load(std::istream& in);
00509 
00510 //              uint32_t get_sample_dens() const;
00511 //              void set_sample_dens(const uint32_t sample_dens);
00512 
00513         uint32_t get_psi_sample_dens() const;
00514         void set_psi_sample_dens(const uint32_t sample_dens);
00515 
00517 
00524         size_type rank_bwt(size_type i, const char_type c)const {
00525             return m_wavelet_tree.rank(i, c);
00526         }
00527 
00529 
00536         size_type select_bwt(size_type i, const char_type c)const {
00537             assert(i > 0);
00538             char_type cc = m_char2comp[c];
00539             if (cc==0 and c!=0)  // character is not in the text => return size()
00540                 return size();
00541             assert(cc != 255);
00542             if (m_C[cc]+i-1 <  m_C[cc+1]) {
00543                 return m_wavelet_tree.select(i, c);
00544             } else
00545                 return size();
00546         }
00547 };
00548 
00549 // == template functions ==
00550 
00551 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00552 csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::csa_wt(const char_type* str):char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma), psi(m_psi), bwt(m_bwt), sa_sample(m_sa_sample), isa_sample(m_isa_sample), wavelet_tree(m_wavelet_tree)
00553 {
00554     csa_uncompressed sa(str);
00555     /*  size_type n = strlen((const char*)str);
00556         int_vector<> sa(n+1, 0, bit_magic::l1BP(n+1)+1);
00557         algorithm::calculate_sa(str, n+1, sa);   // calculate the suffix array sa of str
00558         assert(sa.size() == n+1);
00559     */
00560     algorithm::set_text<csa_wt>(str, sa.size(), m_C, m_char2comp, m_comp2char, m_sigma);
00561     construct(sa, str);
00562 //      if( n+1 > 0 and n+1 != size() )
00563 //              throw std::logic_error(util::demangle(typeid(this).name())+": text size differ with sa size!");
00564 }
00565 
00566 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00567 template<typename RandomAccessContainer>
00568 csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::csa_wt(const RandomAccessContainer& sa, const char_type* str):char2comp(m_char2comp), comp2char(m_comp2char),C(m_C), sigma(m_sigma), psi(m_psi), bwt(m_bwt),sa_sample(m_sa_sample), isa_sample(m_isa_sample),wavelet_tree(m_wavelet_tree)
00569 {
00570     size_type n = 1;
00571     if (str != NULL) {
00572         n = strlen((const char*)str);
00573     }
00574     algorithm::set_text<csa_wt>(str, n+1, m_C, m_char2comp, m_comp2char, m_sigma);
00575     assert(sa.size() == n+1);
00576     construct(sa, str);
00577     if (n+1 > 0 and n+1 != size())
00578         throw std::logic_error(util::demangle(typeid(this).name())+": text size differ with sa size! ");
00579 }
00580 
00581 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00582 template<class size_type_class, uint8_t int_width, class size_type_class_1>
00583 csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::csa_wt(int_vector_file_buffer<8, size_type_class>& bwt_buf,
00584         int_vector_file_buffer<int_width, size_type_class_1>& sa_buf):char2comp(m_char2comp), comp2char(m_comp2char),C(m_C), sigma(m_sigma), psi(m_psi), bwt(m_bwt),sa_sample(m_sa_sample),isa_sample(m_isa_sample),wavelet_tree(m_wavelet_tree)
00585 {
00586     bwt_buf.reset(); sa_buf.reset();
00587     size_type n = bwt_buf.int_vector_size;
00588     algorithm::set_text<csa_wt>(bwt_buf, n, m_C, m_char2comp, m_comp2char, m_sigma);
00589     assert(sa_buf.int_vetor_size == n);
00590 
00591 //      m_wavelet_tree = WaveletTree(bwt_buf, n);
00592     m_wavelet_tree.construct(bwt_buf, n);
00593 
00594     algorithm::set_sa_and_isa_samples<csa_wt>(sa_buf, m_sa_sample, m_isa_sample);
00595 
00596     m_psi = psi_type(this);
00597     m_bwt = bwt_type(this);
00598 }
00599 
00600 
00601 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00602 template<class size_type_class>
00603 csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::csa_wt(int_vector_file_buffer<8, size_type_class>& bwt_buf):char2comp(m_char2comp), comp2char(m_comp2char),C(m_C), sigma(m_sigma), psi(m_psi), bwt(m_bwt),sa_sample(m_sa_sample),isa_sample(m_isa_sample),wavelet_tree(m_wavelet_tree)
00604 {
00605     if (SampleDens != 0 or InvSampleDens !=0) {
00606         std::cerr << "Warning: Construct only the BWT part of the CSA, please make sure SampleDens and InvSampleDens equal 0!" << std::endl;
00607     }
00608     bwt_buf.reset();
00609     size_type n = bwt_buf.int_vector_size;
00610     algorithm::set_text<csa_wt>(bwt_buf, n, m_C, m_char2comp, m_comp2char, m_sigma);
00611 
00612     m_wavelet_tree.construct(bwt_buf, n);
00613 
00614     m_psi = psi_type(this);
00615     m_bwt = bwt_type(this);
00616 }
00617 
00618 
00619 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00620 csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::csa_wt(tMSS& file_map, const std::string& dir, const std::string& id):char2comp(m_char2comp), comp2char(m_comp2char),C(m_C), sigma(m_sigma), psi(m_psi), bwt(m_bwt),sa_sample(m_sa_sample),isa_sample(m_isa_sample),wavelet_tree(m_wavelet_tree)
00621 {
00622     construct(file_map, dir, id);
00623 }
00624 
00625 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00626 void csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::construct(tMSS& file_map, const std::string& dir, const std::string& id)
00627 {
00628     if (file_map.find("bwt") == file_map.end()) { // if bwt is not already stored on disk => construct bwt
00629         construct_bwt(file_map, dir, id);
00630 //              construct_bwt2(file_map, dir, id);
00631     }
00632     int_vector_file_buffer<8> bwt_buf(file_map["bwt"].c_str());
00633     int_vector_file_buffer<>  sa_buf(file_map["sa"].c_str());
00634     size_type n = bwt_buf.int_vector_size;
00635     algorithm::set_text<csa_wt>(bwt_buf, n, m_C, m_char2comp, m_comp2char, m_sigma);
00636 //      m_wavelet_tree = WaveletTree(bwt_buf, n);
00637     write_R_output("csa", "construct WT", "begin", 1, 0);
00638     m_wavelet_tree.construct(bwt_buf, n);
00639     write_R_output("csa", "construct WT", "end", 1, 0);
00640 
00641     algorithm::set_sa_and_isa_samples<csa_wt>(sa_buf, m_sa_sample, m_isa_sample);
00642 
00643     m_psi = psi_type(this);
00644     m_bwt = bwt_type(this);
00645 }
00646 
00647 /*
00648 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00649 uint32_t csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth>::get_sample_dens()const{
00650         if(SampleDens==0)
00651                 return m_sample_dens;
00652         else
00653                 return SampleDens;
00654 }
00655 
00656 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00657 void csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth>::set_sample_dens(const uint32_t sample_dens){
00658         m_sample_dens = sample_dens;
00659 }
00660 */
00661 
00662 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00663 uint32_t csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::get_psi_sample_dens()const
00664 {
00665     return m_psi.get_sample_dens();
00666 }
00667 
00668 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00669 void csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::set_psi_sample_dens(const uint32_t sample_dens)
00670 {
00671 }
00672 
00673 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00674 template<typename RandomAccessContainer>
00675 void csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::construct_samples(const RandomAccessContainer& sa, const char_type* str)
00676 {
00677     m_sa_sample.set_int_width(bit_magic::l1BP(sa.size())+1);
00678     m_sa_sample.resize((sa.size()+SampleDens-1)/SampleDens);
00679     typename RandomAccessContainer::size_type i=0, idx=0;
00680     for (typename RandomAccessContainer::const_iterator it = sa.begin(); i < sa.size(); it += (difference_type)SampleDens, i += SampleDens, ++idx) {
00681         m_sa_sample[idx] = *it;
00682     }
00683     m_isa_sample.set_int_width(bit_magic::l1BP(sa.size())+1);
00684     m_isa_sample.resize((sa.size()+(InvSampleDens)-1)/(InvSampleDens));
00685     i = 0;
00686     for (typename RandomAccessContainer::const_iterator it = sa.begin(), end = sa.end(); it != end; ++it, ++i) {
00687         if ((*it % InvSampleDens) == 0) {
00688             m_isa_sample[*it/InvSampleDens] = i;
00689         }
00690     }
00691 }
00692 
00693 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00694 template<typename RandomAccessContainer>
00695 void csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::construct(const RandomAccessContainer& sa, const char_type* str)
00696 {
00697     construct_samples(sa, str);
00698     const typename RandomAccessContainer::size_type n = sa.size();
00699     typename RandomAccessContainer::size_type i=0;
00700     if (str == NULL) {
00701         throw std::logic_error(util::demangle(typeid(this).name())+": text is needed to construct the csa_wt!");
00702     } else {
00703         char_type* _bwt = new char_type[n+1];
00704         i = 0;
00705         assert(str[n-1]=='\0');
00706         // TODO: %-operation is slow, replace by to_add[] lookup
00707         for (typename RandomAccessContainer::const_iterator it = sa.begin(), end = sa.end(); it != end; ++it, ++i) {
00708             _bwt[i] = str[(*it+n-1)%(n)];
00709         }
00710         m_wavelet_tree = WaveletTree(_bwt, n);
00711         delete [] _bwt;
00712 
00713         m_psi = psi_type(this);
00714         m_bwt = bwt_type(this);
00715     }
00716 }
00717 
00718 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00719 typename csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::const_iterator csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::begin()const
00720 {
00721     return const_iterator(this, 0);
00722 }
00723 
00724 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00725 typename csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::const_iterator csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::end()const
00726 {
00727     return const_iterator(this, size());
00728 }
00729 
00730 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00731 inline typename csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::value_type csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::operator[](size_type i)const
00732 {
00733     size_type off = 0;
00734     while (i % SampleDens) {
00735         i = m_psi(i);
00736         ++off;
00737     }
00738     value_type result = m_sa_sample[i/SampleDens];
00739     if (result + off < size()) {
00740         return result + off;
00741     } else {
00742         return result + off - size();
00743     }
00744     /*
00745     #ifdef USE_CSA_CACHE
00746         size_type r = 0;
00747         if( csa_cache.exists(i, r) ){
00748                 return r;
00749         }
00750     #endif
00751         size_type off = 0;
00752         while( i % SampleDens ){// while i mod SampleDens != 0 (SA[i] is not sampled)
00753                 i = m_psi[i];       // go to the position where SA[i]+1 is located
00754                 ++off;              // add 1 to the offset
00755         }
00756         value_type result = m_sa_sample[i/SampleDens];
00757         // TODO: try LF-function for iteration
00758         if( result < off ){
00759     #ifdef USE_CSA_CACHE
00760                 r = m_psi.size()-(off-result);
00761                 return r;
00762     #endif
00763                 return m_psi.size()-(off-result);
00764         }
00765         else{
00766     #ifdef USE_CSA_CACHE
00767                 r = result-off;
00768                 return r;
00769     #endif
00770                 return result-off;
00771         }
00772     */
00773 }
00774 
00775 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00776 inline typename csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::value_type csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::operator()(size_type i)const
00777 {
00778     size_type ii;
00779     value_type result = m_isa_sample[ ii = ((i+InvSampleDens-1)/InvSampleDens) ]; // get the leftmost sampled isa value to the right of i
00780     ii *= InvSampleDens;
00781     if (ii >= size()) {
00782         i = size() - 1 - i;
00783     } else {
00784         i = ii - i;
00785     }
00786     while (i--) {
00787         result = m_psi(result);
00788     }
00789     return result;
00790 }
00791 
00792 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00793 csa_wt<WaveletTree,SampleDens, InvSampleDens, fixedIntWidth, charType>& csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::operator=(const csa_wt<WaveletTree,SampleDens, InvSampleDens, fixedIntWidth, charType>& csa)
00794 {
00795     if (this != &csa) {
00796         copy(csa);
00797     }
00798     return *this;
00799 }
00800 
00801 
00802 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00803 typename csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::size_type csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::serialize(std::ostream& out, structure_tree_node* v, std::string name)const
00804 {
00805     structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
00806     size_type written_bytes = 0;
00807     written_bytes += m_wavelet_tree.serialize(out, child, "wavelet_tree");
00808     written_bytes += m_sa_sample.serialize(out, child, "sa_samples");
00809     written_bytes += m_isa_sample.serialize(out, child, "isa_samples");
00810     written_bytes += m_char2comp.serialize(out, child, "char2comp");
00811     written_bytes += m_comp2char.serialize(out, child, "comp2char");
00812     written_bytes += m_C.serialize(out, child, "C");
00813     written_bytes += util::write_member(m_sigma, out, child, "sigma");
00814     structure_tree::add_size(child, written_bytes);
00815     return written_bytes;
00816 }
00817 
00818 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00819 void csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::load(std::istream& in)
00820 {
00821     m_wavelet_tree.load(in);
00822     m_sa_sample.load(in);
00823     m_isa_sample.load(in);
00824     m_char2comp.load(in);
00825     m_comp2char.load(in);
00826     m_C.load(in);
00827     util::read_member(m_sigma, in);
00828     m_psi = psi_type(this);
00829     m_bwt = bwt_type(this);
00830 }
00831 
00832 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth, class charType>
00833 void csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>::swap(csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth, charType>& csa)
00834 {
00835     if (this != &csa) {
00836         m_wavelet_tree.swap(csa.m_wavelet_tree);
00837         m_sa_sample.swap(csa.m_sa_sample);
00838         m_isa_sample.swap(csa.m_isa_sample);
00839         m_char2comp.swap(csa.m_char2comp);
00840         m_comp2char.swap(csa.m_comp2char);
00841         m_C.swap(csa.m_C);
00842         std::swap(m_sigma, csa.m_sigma);
00843 //        m_psi.swap(csa.m_psi);
00844         m_psi = psi_type(this);
00845         csa.m_psi = psi_type(&csa);
00846         m_bwt = bwt_type(this);
00847         csa.m_bwt = bwt_type(&csa);
00848     }
00849 }
00850 
00851 /* TODO
00852 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00853 bool csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth>::operator==(const csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth> &csa)const{
00854         for(uint16_t i=0;i<256;++i)
00855                 if(m_char2comp[i] != csa.m_char2comp[i] or m_comp2char[i] != csa.m_comp2char[i] or m_C[i] != csa.m_C[i])
00856                         return false;
00857         return m_psi == csa.m_psi and m_sa_sample == csa.m_sa_sample and m_isa_sample == csa.m_isa_sample and m_C[256] == csa.m_C[256] and m_sigma == csa.m_sigma;
00858 }
00859 
00860 template<class WaveletTree, uint32_t SampleDens, uint32_t InvSampleDens, uint8_t fixedIntWidth>
00861 bool csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth>::operator!=(const csa_wt<WaveletTree, SampleDens, InvSampleDens, fixedIntWidth> &csa)const{
00862         return !(*this == csa);
00863 //      return m_psi != csa.m_psi or m_sa_sample != csa.m_sa_sample or m_isa_sample != csa.m_isa_sample;
00864 }
00865 */
00866 
00867 } // end namespace sdsl
00868 
00869 #endif