SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2009 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef INCLUDED_SDSL_CSA_UNCOMPRESSED 00022 #define INCLUDED_SDSL_CSA_UNCOMPRESSED 00023 00024 #include "sdsl_concepts.hpp" 00025 #include "suffixarray_helper.hpp" 00026 #include "int_vector.hpp" 00027 #include "algorithms.hpp" 00028 #include "iterators.hpp" 00029 #include "util.hpp" 00030 #include "testutils.hpp" 00031 #include <iostream> 00032 #include <algorithm> 00033 #include <cassert> 00034 #include <cstring> // for strlen 00035 #include <iomanip> 00036 #include <iterator> 00037 00038 00039 namespace sdsl 00040 { 00041 00043 00055 class csa_uncompressed 00056 { 00057 public: 00058 typedef uint64_t value_type; // STL Container requirement 00059 typedef random_access_const_iterator<csa_uncompressed> const_iterator;// STL Container requirement 00060 typedef const_iterator iterator; // STL Container requirement 00061 typedef const value_type const_reference; 00062 typedef const_reference reference; 00063 typedef const_reference* pointer; 00064 typedef const pointer const_pointer; 00065 typedef int_vector<>::size_type size_type; // STL Container requirement 00066 typedef size_type csa_size_type; 00067 typedef ptrdiff_t difference_type; // STL Container requirement 00068 typedef psi_of_sa_and_isa<csa_uncompressed> psi_type; 00069 typedef bwt_of_csa_psi<csa_uncompressed> bwt_type; 00070 typedef const unsigned char* pattern_type; 00071 typedef unsigned char char_type; 00072 typedef int_vector<> sa_sample_type; 00073 typedef int_vector<> isa_sample_type; 00074 00075 typedef csa_tag index_category; 00076 00077 enum { sa_sample_dens = 1, 00078 isa_sample_dens = 1 00079 }; 00080 00081 friend class psi_of_sa_and_isa<csa_uncompressed>; 00082 friend class bwt_of_csa_psi<csa_uncompressed>; 00083 private: 00084 sa_sample_type m_sa; // vector for suffix array values 00085 isa_sample_type m_isa; // vector for inverse suffix array values 00086 psi_type m_psi; // wrapper class for psi function values 00087 bwt_type m_bwt; // wrapper class for the BWT 00088 int_vector<8> m_char2comp; 00089 int_vector<8> m_comp2char; 00090 int_vector<64> m_C; 00091 uint16_t m_sigma; 00092 00093 void construct(); 00094 void copy(const csa_uncompressed& csa); 00095 public: 00096 const int_vector<8>& char2comp; 00097 const int_vector<8>& comp2char; 00098 const int_vector<64>& C; 00099 const uint16_t& sigma; 00100 const psi_type& psi; 00101 const bwt_type& bwt; 00102 const sa_sample_type& sa_sample; 00103 const isa_sample_type& isa_sample; 00104 00106 csa_uncompressed():char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma) ,psi(m_psi), bwt(m_bwt), sa_sample(m_sa), isa_sample(m_isa) { 00107 util::assign(m_C, int_vector<64>(257)); 00108 util::assign(m_char2comp, int_vector<8>(256)); 00109 util::assign(m_comp2char, int_vector<8>(256)); 00110 } 00112 ~csa_uncompressed() {} 00114 csa_uncompressed(const csa_uncompressed& csa):char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma), psi(m_psi), bwt(m_bwt), sa_sample(m_sa), isa_sample(m_isa) { 00115 copy(csa); 00116 } 00117 00119 template<typename RandomAccessContainer> 00120 csa_uncompressed(const RandomAccessContainer& sa, const unsigned char* str); 00121 00122 00124 csa_uncompressed(const unsigned char* str); 00125 00127 template<uint8_t int_width, class size_type_class, class size_type_class_1> 00128 csa_uncompressed(int_vector_file_buffer<8, size_type_class>& text_buf, 00129 int_vector_file_buffer<int_width, size_type_class_1>& sa_buf):char2comp(m_char2comp), 00130 comp2char(m_comp2char), C(m_C), sigma(m_sigma) ,psi(m_psi), bwt(m_bwt), sa_sample(m_sa), isa_sample(m_isa) { 00131 text_buf.reset(); sa_buf.reset(); 00132 size_type n = text_buf.int_vector_size; 00133 algorithm::set_text<csa_uncompressed>(text_buf, n, m_C, m_char2comp, m_comp2char, m_sigma); 00134 algorithm::set_sa_and_isa_samples<csa_uncompressed>(sa_buf, m_sa, m_isa); 00135 m_psi = psi_type(this); 00136 m_bwt = bwt_type(this); 00137 } 00138 00139 csa_uncompressed(tMSS& file_map, const std::string& dir, const std::string& id):char2comp(m_char2comp), 00140 comp2char(m_comp2char), C(m_C), sigma(m_sigma), psi(m_psi), bwt(m_bwt), sa_sample(m_sa), isa_sample(m_isa) { 00141 construct(file_map, dir, id); 00142 } 00143 00144 00145 void construct(tMSS& file_map, const std::string& dir, const std::string& id) { 00146 int_vector_file_buffer<8> text_buf(file_map["text"].c_str()); 00147 int_vector_file_buffer<> sa_buf(file_map["sa"].c_str()); 00148 size_type n = text_buf.int_vector_size; 00149 algorithm::set_text<csa_uncompressed>(text_buf, n, m_C, m_char2comp, m_comp2char, m_sigma); 00150 algorithm::set_sa_and_isa_samples<csa_uncompressed>(sa_buf, m_sa, m_isa); 00151 m_psi = psi_type(this); 00152 m_bwt = bwt_type(this); 00153 write_R_output("csa", "store ISA","begin",1,0); 00154 if (!util::store_to_file(m_isa, (dir+"isa_"+id).c_str(), true)) { 00155 throw std::ios_base::failure("#csa_uncompressed: Cannot store ISA to file system!"); 00156 } else { 00157 file_map["isa"] = dir+"isa_"+id; 00158 } 00159 write_R_output("csa", "store ISA","end",1,0); 00160 } 00161 00162 00164 00167 size_type size()const { 00168 return m_sa.size(); 00169 } 00170 00172 00175 static size_type max_size() { 00176 return int_vector<>::max_size(); 00177 } 00178 00180 00183 bool empty()const { 00184 return m_sa.empty(); 00185 } 00186 00188 00196 void swap(csa_uncompressed& csa); 00197 00199 00202 const_iterator begin()const; 00203 00205 00208 const_iterator end()const; 00209 00211 00215 inline value_type operator[](size_type i)const { 00216 return m_sa[i]; 00217 } 00218 00220 00222 inline value_type operator()(size_type i)const { 00223 return m_isa[i]; 00224 } 00225 00227 00230 csa_uncompressed& operator=(const csa_uncompressed& csa); 00231 00233 00238 bool operator==(const csa_uncompressed& csa)const; 00239 00241 00246 bool operator!=(const csa_uncompressed& csa)const; 00247 00249 00252 size_type serialize(std::ostream& out) const; 00253 00255 00257 void load(std::istream& in); 00258 00259 size_type get_sample_dens()const { 00260 return 1; 00261 } 00262 00264 00271 size_type rank_bwt(size_type i, const unsigned char c) const; 00272 00274 00281 size_type select_bwt(size_type i, const unsigned char c) const; 00282 }; 00283 00284 00285 template<typename RandomAccessContainer> 00286 csa_uncompressed::csa_uncompressed(const RandomAccessContainer& sa, const unsigned char* str):char2comp(m_char2comp), comp2char(m_comp2char),C(m_C), sigma(m_sigma), psi(m_psi), bwt(m_bwt), sa_sample(m_sa), isa_sample(m_isa) 00287 { 00288 size_type n = 1; 00289 if (str != NULL) { 00290 n = strlen((const char*)str); 00291 } 00292 algorithm::set_text<csa_uncompressed>(str, n+1, m_C, m_char2comp, m_comp2char, m_sigma); 00293 // assert(sa.size() == n+1); 00294 m_sa = int_vector<>(n+1, 0, bit_magic::l1BP(n+1)+1); 00295 for (size_type i=0; i<n+1; ++i) 00296 m_sa[i] = sa[i]; 00297 construct(); 00298 if (n+1 > 0 and n+1 != size()) 00299 throw std::logic_error(util::demangle(typeid(this).name())+": text size differ with sa size!"); 00300 } 00301 00302 00303 } // end namespace sdsl 00304 00305 #endif