SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2010 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef INCLUDED_SDSL_WT_HUFF 00022 #define INCLUDED_SDSL_WT_HUFF 00023 00024 #include "int_vector.hpp" 00025 #include "rank_support_v.hpp" 00026 #include "rank_support_v5.hpp" 00027 #include "select_support_mcl.hpp" 00028 #include "rrr_vector.hpp" 00029 #include "bitmagic.hpp" 00030 #include "util.hpp" 00031 #include "wt_helper.hpp" 00032 #include "wt.hpp" 00033 #include <algorithm> // for std::swap 00034 #include <stdexcept> 00035 #include <vector> 00036 #include <utility> // for pair 00037 #include <deque> 00038 #include <queue> 00039 00040 00041 #ifdef SDSL_DEBUG 00042 #define SDSL_DEBUG_WT_HUFF 00043 #endif 00044 00045 00046 //#define SDSL_DEBUG_WAVELET_TREE 00047 00048 #ifdef SDSL_DEBUG_WAVELET_TREE 00049 #include "testutils.hpp" 00050 #endif 00051 00053 namespace sdsl 00054 { 00055 00056 const int_vector<>::size_type ZoO[2] = {0, (int_vector<>::size_type)-1}; 00057 00058 template<class size_type> 00059 struct _node { 00060 size_type tree_pos; // pointer into the bit_vector, which represents the wavelet tree 00061 size_type tree_pos_rank; // pre-calculated rank for the prefix up to but not including tree_pos 00062 uint16_t parent; // pointer to the parent 00063 uint16_t child[2]; // pointer to the children 00064 00065 _node(size_type tree_pos=0, size_type tree_pos_rank=0, uint16_t parent=_undef_node, 00066 uint16_t child_left=_undef_node, uint16_t child_right=_undef_node): 00067 tree_pos(tree_pos), tree_pos_rank(tree_pos_rank), parent(parent) { 00068 child[0] = child_left; 00069 child[1] = child_right; 00070 } 00071 00072 _node& operator=(const _node& v) { 00073 if (this != &v) { 00074 tree_pos = v.tree_pos; 00075 tree_pos_rank = v.tree_pos_rank; 00076 parent = v.parent; 00077 child[0] = v.child[0]; 00078 child[1] = v.child[1]; 00079 } 00080 return *this; 00081 } 00082 00083 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { 00084 structure_tree_node* st_child = structure_tree::add_child(v, name, util::class_name(*this)); 00085 size_type written_bytes = 0; 00086 written_bytes += util::write_member(tree_pos, out); 00087 written_bytes += util::write_member(tree_pos_rank, out); 00088 written_bytes += util::write_member(parent, out); 00089 out.write((char*)child, 2*sizeof(child[0])); 00090 written_bytes += 2*sizeof(child[0]); 00091 structure_tree::add_size(st_child, written_bytes); 00092 return written_bytes; 00093 } 00094 00095 void load(std::istream& in) { 00096 util::read_member(tree_pos, in); 00097 util::read_member(tree_pos_rank, in); 00098 util::read_member(parent, in); 00099 in.read((char*) child, 2*sizeof(child[0])); 00100 } 00101 }; 00102 00104 00121 template<class BitVector = bit_vector, 00122 class RankSupport = typename BitVector::rank_1_type, 00123 class SelectSupport = typename BitVector::select_1_type, 00124 class SelectSupportZero = typename BitVector::select_0_type, 00125 bool dfs_shape=0 > 00126 class wt_huff 00127 { 00128 public: 00129 typedef int_vector<>::size_type size_type; 00130 typedef unsigned char value_type; 00131 typedef BitVector bit_vector_type; 00132 typedef RankSupport rank_1_type; 00133 typedef SelectSupport select_1_type; 00134 typedef SelectSupportZero select_0_type; 00135 00136 private: 00137 #ifdef WT_HUFF_CACHE 00138 mutable value_type m_last_access_answer; 00139 mutable size_type m_last_access_i; 00140 mutable size_type m_last_access_rl; 00141 #endif 00142 00143 size_type m_size; 00144 size_type m_sigma; //<- \f$ |\Sigma| \f$ 00145 bit_vector_type m_tree; // bit vector to store the wavelet tree 00146 RankSupport m_tree_rank; // rank support for the wavelet tree bit vector 00147 SelectSupport m_tree_select1; // select support for the wavelet tree bit vector 00148 SelectSupportZero m_tree_select0; 00149 00150 _node<size_type> m_nodes[511]; // nodes for the Huffman tree structure 00151 uint16_t m_c_to_leaf[256];// map symbol c to a leaf in the tree structure 00152 // if m_c_to_leaf[c] == _undef_node the char does 00153 // not exists in the text 00154 uint64_t m_path[256]; // path information for each char; the bits at position 00155 // 0..55 hold path information; bits 56..63 the length 00156 // of the path in binary representation 00157 // wavelet_tree<> m_check; 00158 00159 typedef std::pair<size_type, size_type> tPII; // pair (frequency, node_number) for constructing the Huffman tree 00160 typedef std::priority_queue<tPII, std::vector<tPII>, std::greater<tPII> > tMPQPII; // minimum priority queue 00161 00162 void copy(const wt_huff& wt) { 00163 m_size = wt.m_size; 00164 m_sigma = wt.m_sigma; 00165 m_tree = wt.m_tree; 00166 m_tree_rank = wt.m_tree_rank; 00167 m_tree_rank.set_vector(&m_tree); 00168 m_tree_select1 = wt.m_tree_select1; 00169 m_tree_select1.set_vector(&m_tree); 00170 m_tree_select0 = wt.m_tree_select0; 00171 m_tree_select0.set_vector(&m_tree); 00172 for (size_type i=0; i < 511; ++i) 00173 m_nodes[i] = wt.m_nodes[i]; 00174 for (size_type i=0; i<256; ++i) 00175 m_c_to_leaf[i] = wt.m_c_to_leaf[i]; 00176 for (size_type i=0; i<256; ++i) { 00177 m_path[i] = wt.m_path[i]; 00178 } 00179 // m_check = wt.m_check; 00180 } 00181 00182 // insert a character into the wavelet tree, see constuct method 00183 void insert_char(uint8_t old_chr, size_type* tree_pos, size_type times, bit_vector& f_tree) { 00184 uint32_t path_len = (m_path[old_chr]>>56); 00185 uint64_t p = m_path[old_chr]; 00186 for (uint32_t node=0, l=0; l<path_len; ++l, p >>= 1) { 00187 if (p&1) { 00188 f_tree.set_int(tree_pos[node], 0xFFFFFFFFFFFFFFFFULL,times); 00189 } 00190 tree_pos[node] += times; 00191 node = m_nodes[node].child[p&1]; 00192 } 00193 } 00194 00195 // calculates the Huffman tree and returns the size of the WT bit vector 00196 size_type construct_huffman_tree(size_type* C) { 00197 tMPQPII pq; // priority queue 00198 std::vector<_node<size_type> > temp_nodes(2*m_sigma-1); // vector for nodes of the Huffman tree 00199 size_type node_cnt=0; // counter for the nodes 00200 for (size_type i=0; i < 256; ++i) // add leafs of Huffman tree 00201 if (C[i] > 0) { 00202 pq.push(tPII(C[i], node_cnt)); // push (frequency, pointer to node) 00203 temp_nodes[node_cnt++] = _node<size_type>(C[i], i); // initial tree_pos with number of occurences 00204 // and tree_pos_rank value with the code of the corresponding char 00205 // parent, child[0], and child[1] are set to _undef_node 00206 } 00207 while (pq.size() > 1) { 00208 tPII v1, v2; 00209 v1 = pq.top(); pq.pop(); 00210 v2 = pq.top(); pq.pop(); 00211 temp_nodes[ v1.second ].parent = node_cnt; // parent is new node 00212 temp_nodes[ v2.second ].parent = node_cnt; // parent is new node 00213 size_type frq_sum = v1.first + v2.first; 00214 pq.push(tPII(frq_sum, node_cnt)); // push new node to the priority queue 00215 temp_nodes[ node_cnt++ ] = _node<size_type>(frq_sum, 0, _undef_node, v1.second, v2.second); 00216 } 00217 // Convert Huffman tree into breadth first search order in memory and 00218 // calculate tree_pos values 00219 m_nodes[0] = temp_nodes[node_cnt-1]; // insert root at index 0 00220 size_type tree_size = 0; 00221 node_cnt = 1; 00222 uint16_t last_parent = _undef_node; 00223 std::deque<size_type> q; 00224 q.push_back(0); 00225 while (!q.empty()) { 00226 size_type idx; 00227 if (!dfs_shape) { 00228 idx = q.front(); q.pop_front(); 00229 } else { 00230 idx = q.back(); q.pop_back(); 00231 } 00232 size_type frq = m_nodes[idx].tree_pos; // frq_sum was stored in tree_pos 00233 m_nodes[idx].tree_pos = tree_size; 00234 if (m_nodes[idx].child[0] != _undef_node) // if node is not a leaf 00235 tree_size += frq; // add frequency, as leaves have size 0 00236 if (idx > 0) { // node is not the root 00237 if (last_parent != m_nodes[idx].parent) 00238 m_nodes[m_nodes[idx].parent].child[0] = idx; 00239 else 00240 m_nodes[m_nodes[idx].parent].child[1] = idx; 00241 last_parent = m_nodes[idx].parent; 00242 } 00243 if (m_nodes[idx].child[0] != _undef_node) { // if node is not a leaf 00244 for (size_type k=0; k<2; ++k) { // add children to tree 00245 m_nodes[node_cnt] = temp_nodes[ m_nodes[idx].child[k] ]; 00246 m_nodes[node_cnt].parent = idx; 00247 q.push_back(node_cnt); 00248 m_nodes[idx].child[k] = node_cnt++; 00249 } 00250 } 00251 } 00252 00253 // initialize m_c_to_leaf 00254 for (size_type i=0; i<256; ++i) 00255 m_c_to_leaf[i] = _undef_node; // if c is not in the alphabet m_c_to_leaf[c] = _undef_node 00256 for (size_type i=0; i < 2*sigma-1; ++i) { 00257 if (m_nodes[i].child[0] == _undef_node) // if node is a leaf 00258 m_c_to_leaf[(uint8_t)m_nodes[i].tree_pos_rank] = i; // calculate value 00259 } 00260 // initialize path information 00261 // Note: In the case of a bfs search order, 00262 // we can classify nodes as rigth child and left child with an easy criterion: 00263 // node is a left child, if node%2==1 00264 // node is a rigth child, if node%2==0 00265 for (size_type c=0; c<256; ++c) { 00266 if (m_c_to_leaf[c] != _undef_node) { // if char exists in the alphabet 00267 size_type node = m_c_to_leaf[c]; 00268 uint64_t w = 0; // path 00269 uint64_t l = 0; // path len 00270 while (node != 0) { // while node is not the root 00271 w <<= 1; 00272 // if( (node & 1) == 0 )// if the node is a right child 00273 // w |= 1ULL; 00274 if (m_nodes[m_nodes[node].parent].child[1] == node) 00275 w |= 1ULL; 00276 ++l; 00277 node = m_nodes[node].parent; // go up the tree 00278 } 00279 if (l > 56) { 00280 std::cerr<<"Huffman tree has max depth > 56!!! ERROR"<<std::endl; 00281 throw std::logic_error("Huffman tree size is greater than 56!!!"); 00282 } 00283 m_path[c] = w | (l << 56); 00284 } else { 00285 m_path[c] = 0; // i.e. len is also 0, good for special case in rank() 00286 } 00287 } 00288 return tree_size; 00289 } 00290 00291 void construct_init_rank_select() { 00292 util::init_support(m_tree_rank, &m_tree); 00293 util::init_support(m_tree_select0, &m_tree); 00294 util::init_support(m_tree_select1, &m_tree); 00295 } 00296 00297 void construct_precalc_node_ranks() { 00298 for (size_type i=0; i<2*m_sigma-1; ++i) { 00299 if (m_nodes[i].child[0] != _undef_node) // if node is not a leaf 00300 m_nodes[i].tree_pos_rank = m_tree_rank(m_nodes[i].tree_pos); 00301 } 00302 } 00303 00304 00305 // recursive internal version of the method interval_symbols 00306 void _interval_symbols(size_type i, size_type j, size_type& k, 00307 std::vector<unsigned char>& cs, 00308 std::vector<size_type>& rank_c_i, 00309 std::vector<size_type>& rank_c_j, uint16_t node) const { 00310 // invariant: j>i 00311 // goto right child 00312 size_type i_new = (m_tree_rank(m_nodes[node].tree_pos + i) - m_nodes[node].tree_pos_rank); 00313 size_type j_new = (m_tree_rank(m_nodes[node].tree_pos + j) - m_nodes[node].tree_pos_rank); 00314 if (i_new!=j_new) { 00315 uint16_t node_new = m_nodes[node].child[1]; 00316 // if node is not a leaf 00317 if (m_nodes[node_new].child[0] != _undef_node) { 00318 _interval_symbols(i_new, j_new, k, cs, rank_c_i, rank_c_j, node_new); 00319 } else { 00320 rank_c_i[k] = i_new; 00321 rank_c_j[k] = j_new; 00322 cs[k++] = m_nodes[node_new].tree_pos_rank; 00323 } 00324 } 00325 // goto left child 00326 i -= i_new; j -= j_new; 00327 if (i != j) { 00328 uint16_t node_new = m_nodes[node].child[0]; 00329 // if node is not a leaf 00330 if (m_nodes[node_new].child[0] != _undef_node) { 00331 _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, node_new); 00332 } else { 00333 rank_c_i[k] = i; 00334 rank_c_j[k] = j; 00335 cs[k++] = m_nodes[node_new].tree_pos_rank; 00336 } 00337 } 00338 } 00339 00340 00341 public: 00342 00343 const size_type& sigma; 00344 const bit_vector_type& tree; 00345 00346 // Default constructor 00347 wt_huff():m_size(0),m_sigma(0), sigma(m_sigma),tree(m_tree) {}; 00348 00349 00350 00352 00358 template<typename RandomAccessContainer> 00359 wt_huff(const RandomAccessContainer& rac, size_type size):m_size(size), m_sigma(0), sigma(m_sigma), tree(m_tree) { 00360 construct(rac, size); 00361 } 00362 00363 template<uint8_t w> 00364 wt_huff(const int_vector<w>& rac):m_size(rac.size()), m_sigma(0), sigma(m_sigma), tree(m_tree) { 00365 construct(rac, rac.size()); 00366 } 00367 00368 00369 template<typename RandomAccessContainer> 00370 void construct(const RandomAccessContainer& rac, size_type size) { 00371 m_size = size; 00372 if (m_size == 0) 00373 return; 00374 // O(n + |\Sigma|\log|\Sigma|) algorithm for calculating node sizes 00375 size_type C[256] = {0}; 00376 // 1. Count occurences of characters 00377 for (size_type i=0; i < size; ++i) { 00378 ++C[rac[i]]; 00379 } 00380 // 2. Calculate effective alphabet size 00381 calculate_effective_alphabet_size(C, m_sigma); 00382 // 3. Generate Huffman tree 00383 size_type tree_size = construct_huffman_tree(C); 00384 // 4. Generate wavelet tree bit sequence m_tree 00385 00386 bit_vector tmp_tree(tree_size, 0); // initialize bit_vector for the tree 00387 // Calculate starting position of wavelet tree nodes 00388 size_type tree_pos[511]; 00389 for (size_type i=0; i < 2*sigma-1; ++i) { 00390 tree_pos[i] = m_nodes[i].tree_pos; 00391 } 00392 uint8_t old_chr = rac[0], times = 0; 00393 for (size_type i=0; i < m_size; ++i) { 00394 uint8_t chr = rac[i]; 00395 if (chr != old_chr) { 00396 insert_char(old_chr, tree_pos, times, tmp_tree); 00397 times = 1; 00398 old_chr = chr; 00399 } else { // chr == old_chr 00400 ++times; 00401 if (times == 64) { 00402 insert_char(old_chr, tree_pos, times, tmp_tree); 00403 times = 0; 00404 } 00405 } 00406 } 00407 if (times > 0) { 00408 insert_char(old_chr, tree_pos, times, tmp_tree); 00409 } 00410 util::assign(m_tree, tmp_tree); 00411 // 5. Initialize rank and select data structures for m_tree 00412 construct_init_rank_select(); 00413 // 6. Finish inner nodes by precalculating the tree_pos_rank values 00414 construct_precalc_node_ranks(); 00415 } 00416 00417 template<class size_type_class> 00418 wt_huff(int_vector_file_buffer<8, size_type_class>& rac, size_type size):m_size(size), m_sigma(0), sigma(m_sigma), tree(m_tree) { 00419 construct(rac, size); 00420 } 00421 00423 00426 template<class size_type_class> 00427 void construct(int_vector_file_buffer<8, size_type_class>& rac, size_type size) { 00428 // m_check.construct(rac, size); 00429 m_size = size; 00430 if (m_size == 0) 00431 return; 00432 // O(n + |\Sigma|\log|\Sigma|) algorithm for calculating node sizes 00433 size_type C[256] = {0}; 00434 // 1. Count occurrences of characters 00435 calculate_character_occurences(rac, m_size, C); 00436 // 2. Calculate effective alphabet size 00437 calculate_effective_alphabet_size(C, m_sigma); 00438 // 3. Generate Huffman tree 00439 size_type tree_size = construct_huffman_tree(C); 00440 // 4. Generate wavelet tree bit sequence m_tree 00441 00442 bit_vector tmp_tree(tree_size, 0); // initialize bit_vector for the tree 00443 // Calculate starting position of wavelet tree nodes 00444 size_type tree_pos[511]; 00445 for (size_type i=0; i < 2*sigma-1; ++i) { 00446 tree_pos[i] = m_nodes[i].tree_pos; 00447 } 00448 rac.reset(); 00449 if (rac.int_vector_size < size) { 00450 throw std::logic_error("wt_huff::construct: stream size is smaller than size!"); 00451 return; 00452 } 00453 for (size_type i=0, r_sum=0, r = rac.load_next_block(); r_sum < m_size;) { 00454 if (r_sum + r > size) { // read not more than size chars in the next loop 00455 r = size-r_sum; 00456 } 00457 uint8_t old_chr = rac[i-r_sum], times = 0; 00458 for (; i < r_sum+r; ++i) { 00459 uint8_t chr = rac[i-r_sum]; 00460 if (chr != old_chr) { 00461 insert_char(old_chr, tree_pos, times, tmp_tree); 00462 times = 1; 00463 old_chr = chr; 00464 } else { // chr == old_chr 00465 ++times; 00466 if (times == 64) { 00467 insert_char(old_chr, tree_pos, times, tmp_tree); 00468 times = 0; 00469 } 00470 } 00471 } 00472 if (times > 0) { 00473 insert_char(old_chr, tree_pos, times, tmp_tree); 00474 } 00475 r_sum += r; r = rac.load_next_block(); 00476 } 00477 util::assign(m_tree, tmp_tree); 00478 // 5. Initialize rank and select data structures for m_tree 00479 construct_init_rank_select(); 00480 // 6. Finish inner nodes by precalculating the tree_pos_rank values 00481 construct_precalc_node_ranks(); 00482 } 00483 00484 00486 wt_huff(const wt_huff& wt):sigma(m_sigma), tree(m_tree) { 00487 copy(wt); 00488 } 00489 00491 wt_huff& operator=(const wt_huff& wt) { 00492 if (this != &wt) { 00493 copy(wt); 00494 } 00495 return *this; 00496 } 00497 00499 void swap(wt_huff& wt) { 00500 if (this != &wt) { 00501 std::swap(m_size, wt.m_size); 00502 std::swap(m_sigma, wt.m_sigma); 00503 m_tree.swap(wt.m_tree); 00504 util::swap_support(m_tree_rank, wt.m_tree_rank, &m_tree, &(wt.m_tree)); 00505 00506 util::swap_support(m_tree_select1, wt.m_tree_select1, &m_tree, &(wt.m_tree)); 00507 util::swap_support(m_tree_select0, wt.m_tree_select0, &m_tree, &(wt.m_tree)); 00508 00509 for (size_type i=0; i < 511; ++i) 00510 std::swap(m_nodes[i], wt.m_nodes[i]); 00511 for (size_type i=0; i<256; ++i) 00512 std::swap(m_c_to_leaf[i], wt.m_c_to_leaf[i]); 00513 for (size_type i=0; i<256; ++i) 00514 std::swap(m_path[i], wt.m_path[i]); 00515 00516 // m_check.swap( wt.m_check ); 00517 } 00518 } 00519 00521 size_type size()const { 00522 return m_size; 00523 } 00524 00526 bool empty()const { 00527 return m_size == 0; 00528 } 00529 00531 00537 value_type operator[](size_type i)const { // TODO: Maybe it is good to integrate a cache here 00538 // which stores how many of the next symbols are equal 00539 // with the current char 00540 size_type node = 0; // start at root node 00541 while (m_nodes[node].child[0] != _undef_node) { // while node is not a leaf 00542 if (m_tree[ m_nodes[node].tree_pos + i]) { // goto the right child 00543 i = m_tree_rank(m_nodes[node].tree_pos + i) - m_nodes[node].tree_pos_rank; 00544 node = m_nodes[node].child[1]; 00545 } else { // goto the left child 00546 i -= (m_tree_rank(m_nodes[node].tree_pos + i) - m_nodes[node].tree_pos_rank); 00547 node = m_nodes[node].child[0]; 00548 } 00549 } 00550 return m_nodes[node].tree_pos_rank; 00551 }; 00552 00554 00561 size_type rank(size_type i, value_type c)const { 00562 uint64_t p = m_path[c]; 00563 uint32_t path_len = (m_path[c]>>56); // equals zero if char was not present in the original text or m_sigma=1 00564 if (!path_len and 1 == m_sigma) { // if m_sigma == 1 return result immediately 00565 if (m_c_to_leaf[c] == _undef_node) { // if character does not exist return 0 00566 return 0; 00567 } 00568 return std::min(i, m_size); 00569 } 00570 size_type result = i & ZoO[path_len>0]; // important: result has type size_type and ZoO has type size_type 00571 uint32_t node=0; 00572 for (uint32_t l=0; l<path_len and result; ++l, p >>= 1) { 00573 // result = (ZoO[1-(p&1)]&result) - (ZoO[1-(p&1)] ^ (m_tree_rank(m_nodes[node].tree_pos + result) - m_nodes[node].tree_pos_rank) ); 00574 if (p&1) { 00575 result = (m_tree_rank(m_nodes[node].tree_pos+result) - m_nodes[node].tree_pos_rank); 00576 } else { 00577 result -= (m_tree_rank(m_nodes[node].tree_pos+result) - m_nodes[node].tree_pos_rank); 00578 } 00579 node = m_nodes[node].child[p&1]; // goto child 00580 } 00581 return result; 00582 }; 00583 00585 00592 size_type rank_ith_symbol(size_type i, value_type& c)const { 00593 // TODO: handle m_sigma=1 00594 assert(i>=0 and i < size()); 00595 uint32_t node=0; 00596 while (m_nodes[node].child[0] != _undef_node) { // while node is not a leaf 00597 if (m_tree[m_nodes[node].tree_pos + i]) { // if bit is set at position goto right child 00598 i = (m_tree_rank(m_nodes[node].tree_pos + i) - m_nodes[node].tree_pos_rank); 00599 node = m_nodes[node].child[1]; 00600 } else { // goto left child 00601 i -= (m_tree_rank(m_nodes[node].tree_pos + i) - m_nodes[node].tree_pos_rank); 00602 node = m_nodes[node].child[0]; 00603 } 00604 } 00605 c = m_nodes[node].tree_pos_rank; 00606 return i; 00607 } 00608 00610 00616 size_type select(size_type i, value_type c)const { 00617 uint16_t node = m_c_to_leaf[c]; 00618 if (node == _undef_node) { // if c was not present in the original text 00619 return m_size; // -> return a position right to the end 00620 } 00621 if (m_sigma == 1) { 00622 return std::min(i-1,m_size); 00623 } 00624 /* if( i == 0 ){ 00625 std::cerr<<"WARNING: i=0"<<std::endl; 00626 return m_size; 00627 } 00628 if( i > rank(size(), c) ){ 00629 std::cerr<<"WARNING: i="<<i<<" > rank(size(),"<<c<<")="<<rank(size(),c)<<std::endl; 00630 return m_size; 00631 } 00632 */ 00633 size_type result = i-1; // otherwise 00634 uint64_t p = m_path[c]; 00635 uint32_t path_len = (p>>56); 00636 p <<= (64-path_len); // Note: path_len > 0, since we have handled m_sigma = 1. 00637 for (uint32_t l=0; l<path_len; ++l, p <<= 1) { 00638 // if( node & 1 ){ // node was a left child, in the case of bfs order 00639 if ((p & 0x8000000000000000ULL)==0) { // node was a left child 00640 node = m_nodes[node].parent; 00641 result = m_tree_select0(m_nodes[node].tree_pos-m_nodes[node].tree_pos_rank + result + 1) 00642 - m_nodes[node].tree_pos; 00643 } else { // node was a right child 00644 node = m_nodes[node].parent; 00645 result = m_tree_select1(m_nodes[node].tree_pos_rank + result + 1) 00646 - m_nodes[node].tree_pos; 00647 } 00648 } 00649 /* size_type r2 = m_check.select(i, c); 00650 if( r2 != result ){ 00651 std::cerr<<"ERROR select r="<<result<<" != "<<r2<<"=r2 for input ("<<i<<","<<c<<")"<<std::endl; 00652 return r2; 00653 } 00654 */ 00655 return result; 00656 }; 00657 00658 00660 00676 void interval_symbols(size_type i, size_type j, size_type& k, 00677 std::vector<unsigned char>& cs, 00678 std::vector<size_type>& rank_c_i, 00679 std::vector<size_type>& rank_c_j) const { 00680 if (i==j) { 00681 k = 0; 00682 return; 00683 } else if ((j-i)==1) { 00684 k = 1; 00685 rank_c_i[0] = rank_ith_symbol(i, cs[0]); 00686 rank_c_j[0] = rank_c_i[0]+1; 00687 return; 00688 } else if ((j-i)==2) { 00689 rank_c_i[0] = rank_ith_symbol(i, cs[0]); 00690 rank_c_i[1] = rank_ith_symbol(i+1, cs[1]); 00691 if (cs[0]==cs[1]) { 00692 k = 1; 00693 rank_c_j[0] = rank_c_i[0]+2; 00694 return; 00695 } else { 00696 k = 2; 00697 rank_c_j[0] = rank_c_i[0]+1; 00698 rank_c_j[1] = rank_c_i[1]+1; 00699 return; 00700 } 00701 } else { 00702 k = 0; 00703 _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, 0); 00704 } 00705 } 00706 00707 00708 00710 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { 00711 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00712 size_type written_bytes = 0; 00713 written_bytes += util::write_member(m_size, out, child, "size"); 00714 written_bytes += util::write_member(m_sigma, out, child, "sigma"); 00715 written_bytes += m_tree.serialize(out, child, "tree"); 00716 written_bytes += m_tree_rank.serialize(out, child, "tree_rank"); 00717 written_bytes += m_tree_select1.serialize(out, child, "tree_select_1"); 00718 written_bytes += m_tree_select0.serialize(out, child, "tree_select_0"); 00719 for (size_type i=0; i < 511; ++i) { 00720 written_bytes += m_nodes[i].serialize(out); 00721 } 00722 out.write((char*) m_c_to_leaf, 256*sizeof(m_c_to_leaf[0])); 00723 written_bytes += 256*sizeof(m_c_to_leaf[0]); // add written bytes from previous loop 00724 out.write((char*) m_path, 256*sizeof(m_path[0])); 00725 written_bytes += 256*sizeof(m_path[0]); // add written bytes from previous loop 00726 structure_tree::add_size(child, written_bytes); 00727 return written_bytes; 00728 } 00729 00731 void load(std::istream& in) { 00732 util::read_member(m_size, in); 00733 util::read_member(m_sigma, in); 00734 m_tree.load(in); 00735 m_tree_rank.load(in, &m_tree); 00736 m_tree_select1.load(in, &m_tree); 00737 m_tree_select0.load(in, &m_tree); 00738 for (size_type i=0; i < 511; ++i) { 00739 m_nodes[i].load(in); 00740 } 00741 in.read((char*) m_c_to_leaf, 256*sizeof(m_c_to_leaf[0])); 00742 in.read((char*) m_path, 256*sizeof(m_path[0])); 00743 } 00744 00745 }; 00746 00747 typedef wt_huff<rrr_vector<>, 00748 rrr_vector<>::rank_1_type, 00749 rrr_vector<>::select_1_type, 00750 rrr_vector<>::select_0_type, 0> wt_huff_rrr; 00751 00752 }// end namespace sdsl 00753 00754 #endif // end file