SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/csa_uncompressed.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2009 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef INCLUDED_SDSL_CSA_UNCOMPRESSED
00022 #define INCLUDED_SDSL_CSA_UNCOMPRESSED
00023 
00024 #include "sdsl_concepts.hpp"
00025 #include "suffixarray_helper.hpp"
00026 #include "int_vector.hpp"
00027 #include "algorithms.hpp"
00028 #include "iterators.hpp"
00029 #include "util.hpp"
00030 #include "testutils.hpp"
00031 #include <iostream>
00032 #include <algorithm>
00033 #include <cassert>
00034 #include <cstring> // for strlen
00035 #include <iomanip>
00036 #include <iterator>
00037 
00038 
00039 namespace sdsl
00040 {
00041 
00043 
00055 class csa_uncompressed
00056 {
00057     public:
00058         typedef uint64_t                                                                                         value_type;    // STL Container requirement
00059         typedef random_access_const_iterator<csa_uncompressed>           const_iterator;// STL Container requirement
00060         typedef const_iterator                                                                           iterator;              // STL Container requirement
00061         typedef const value_type                                                                         const_reference;
00062         typedef const_reference                                                                          reference;
00063         typedef const_reference*                                                                         pointer;
00064         typedef const pointer                                                                            const_pointer;
00065         typedef int_vector<>::size_type                                                          size_type;             // STL Container requirement
00066         typedef size_type                                                                                        csa_size_type;
00067         typedef ptrdiff_t                                                                                        difference_type; // STL Container requirement
00068         typedef psi_of_sa_and_isa<csa_uncompressed>                                      psi_type;
00069         typedef bwt_of_csa_psi<csa_uncompressed>                                         bwt_type;
00070         typedef const unsigned char*                                                             pattern_type;
00071         typedef unsigned char                                                                            char_type;
00072         typedef int_vector<>                                                                             sa_sample_type;
00073         typedef int_vector<>                                                                             isa_sample_type;
00074 
00075         typedef csa_tag                                                                                         index_category;
00076 
00077         enum { sa_sample_dens = 1,
00078                isa_sample_dens = 1
00079              };
00080 
00081         friend class psi_of_sa_and_isa<csa_uncompressed>;
00082         friend class bwt_of_csa_psi<csa_uncompressed>;
00083     private:
00084         sa_sample_type  m_sa;  // vector for suffix array values
00085         isa_sample_type m_isa; // vector for inverse suffix array values
00086         psi_type                m_psi; // wrapper class for psi function values
00087         bwt_type                m_bwt; // wrapper class for the BWT
00088         int_vector<8>   m_char2comp;
00089         int_vector<8>   m_comp2char;
00090         int_vector<64>  m_C;
00091         uint16_t                m_sigma;
00092 
00093         void construct();
00094         void copy(const csa_uncompressed& csa);
00095     public:
00096         const int_vector<8>& char2comp;
00097         const int_vector<8>& comp2char;
00098         const int_vector<64>& C;
00099         const uint16_t& sigma;
00100         const psi_type& psi;
00101         const bwt_type& bwt;
00102         const sa_sample_type& sa_sample;
00103         const isa_sample_type& isa_sample;
00104 
00106         csa_uncompressed():char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma) ,psi(m_psi), bwt(m_bwt), sa_sample(m_sa), isa_sample(m_isa) {
00107             util::assign(m_C, int_vector<64>(257));
00108             util::assign(m_char2comp, int_vector<8>(256));
00109             util::assign(m_comp2char, int_vector<8>(256));
00110         }
00112         ~csa_uncompressed() {}
00114         csa_uncompressed(const csa_uncompressed& csa):char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma), psi(m_psi), bwt(m_bwt), sa_sample(m_sa), isa_sample(m_isa) {
00115             copy(csa);
00116         }
00117 
00119         template<typename RandomAccessContainer>
00120         csa_uncompressed(const RandomAccessContainer& sa, const unsigned char* str);
00121 
00122 
00124         csa_uncompressed(const unsigned char* str);
00125 
00127         template<uint8_t int_width, class size_type_class, class size_type_class_1>
00128         csa_uncompressed(int_vector_file_buffer<8, size_type_class>& text_buf,
00129                          int_vector_file_buffer<int_width, size_type_class_1>& sa_buf):char2comp(m_char2comp),
00130             comp2char(m_comp2char), C(m_C), sigma(m_sigma) ,psi(m_psi), bwt(m_bwt), sa_sample(m_sa), isa_sample(m_isa) {
00131             text_buf.reset(); sa_buf.reset();
00132             size_type n = text_buf.int_vector_size;
00133             algorithm::set_text<csa_uncompressed>(text_buf, n, m_C, m_char2comp, m_comp2char, m_sigma);
00134             algorithm::set_sa_and_isa_samples<csa_uncompressed>(sa_buf, m_sa, m_isa);
00135             m_psi = psi_type(this);
00136             m_bwt = bwt_type(this);
00137         }
00138 
00139         csa_uncompressed(tMSS& file_map, const std::string& dir, const std::string& id):char2comp(m_char2comp),
00140             comp2char(m_comp2char), C(m_C), sigma(m_sigma), psi(m_psi), bwt(m_bwt), sa_sample(m_sa), isa_sample(m_isa) {
00141             construct(file_map, dir, id);
00142         }
00143 
00144 
00145         void construct(tMSS& file_map, const std::string& dir, const std::string& id) {
00146             int_vector_file_buffer<8> text_buf(file_map["text"].c_str());
00147             int_vector_file_buffer<>  sa_buf(file_map["sa"].c_str());
00148             size_type n = text_buf.int_vector_size;
00149             algorithm::set_text<csa_uncompressed>(text_buf, n, m_C, m_char2comp, m_comp2char, m_sigma);
00150             algorithm::set_sa_and_isa_samples<csa_uncompressed>(sa_buf, m_sa, m_isa);
00151             m_psi = psi_type(this);
00152             m_bwt = bwt_type(this);
00153             write_R_output("csa", "store ISA","begin",1,0);
00154             if (!util::store_to_file(m_isa, (dir+"isa_"+id).c_str(), true)) {
00155                 throw std::ios_base::failure("#csa_uncompressed: Cannot store ISA to file system!");
00156             } else {
00157                 file_map["isa"] = dir+"isa_"+id;
00158             }
00159             write_R_output("csa", "store ISA","end",1,0);
00160         }
00161 
00162 
00164 
00167         size_type size()const {
00168             return m_sa.size();
00169         }
00170 
00172 
00175         static size_type max_size() {
00176             return int_vector<>::max_size();
00177         }
00178 
00180 
00183         bool empty()const {
00184             return m_sa.empty();
00185         }
00186 
00188 
00196         void swap(csa_uncompressed& csa);
00197 
00199 
00202         const_iterator begin()const;
00203 
00205 
00208         const_iterator end()const;
00209 
00211 
00215         inline value_type operator[](size_type i)const {
00216             return m_sa[i];
00217         }
00218 
00220 
00222         inline value_type operator()(size_type i)const {
00223             return m_isa[i];
00224         }
00225 
00227 
00230         csa_uncompressed& operator=(const csa_uncompressed& csa);
00231 
00233 
00238         bool operator==(const csa_uncompressed& csa)const;
00239 
00241 
00246         bool operator!=(const csa_uncompressed& csa)const;
00247 
00249 
00252         size_type serialize(std::ostream& out) const;
00253 
00255 
00257         void load(std::istream& in);
00258 
00259         size_type get_sample_dens()const {
00260             return 1;
00261         }
00262 
00264 
00271         size_type rank_bwt(size_type i, const unsigned char c) const;
00272 
00274 
00281         size_type select_bwt(size_type i, const unsigned char c) const;
00282 };
00283 
00284 
00285 template<typename RandomAccessContainer>
00286 csa_uncompressed::csa_uncompressed(const RandomAccessContainer& sa, const unsigned char* str):char2comp(m_char2comp), comp2char(m_comp2char),C(m_C), sigma(m_sigma), psi(m_psi), bwt(m_bwt), sa_sample(m_sa), isa_sample(m_isa)
00287 {
00288     size_type n = 1;
00289     if (str != NULL) {
00290         n = strlen((const char*)str);
00291     }
00292     algorithm::set_text<csa_uncompressed>(str, n+1, m_C, m_char2comp, m_comp2char, m_sigma);
00293 //      assert(sa.size() == n+1);
00294     m_sa = int_vector<>(n+1, 0, bit_magic::l1BP(n+1)+1);
00295     for (size_type i=0; i<n+1; ++i)
00296         m_sa[i] = sa[i];
00297     construct();
00298     if (n+1 > 0 and n+1 != size())
00299         throw std::logic_error(util::demangle(typeid(this).name())+": text size differ with sa size!");
00300 }
00301 
00302 
00303 } // end namespace sdsl
00304 
00305 #endif