SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/wt_huff.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2010 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef INCLUDED_SDSL_WT_HUFF
00022 #define INCLUDED_SDSL_WT_HUFF
00023 
00024 #include "int_vector.hpp"
00025 #include "rank_support_v.hpp"
00026 #include "rank_support_v5.hpp"
00027 #include "select_support_mcl.hpp"
00028 #include "rrr_vector.hpp"
00029 #include "bitmagic.hpp"
00030 #include "util.hpp"
00031 #include "wt_helper.hpp"
00032 #include "wt.hpp"
00033 #include <algorithm> // for std::swap
00034 #include <stdexcept>
00035 #include <vector>
00036 #include <utility> // for pair
00037 #include <deque>
00038 #include <queue>
00039 
00040 
00041 #ifdef SDSL_DEBUG
00042 #define SDSL_DEBUG_WT_HUFF
00043 #endif
00044 
00045 
00046 //#define SDSL_DEBUG_WAVELET_TREE
00047 
00048 #ifdef SDSL_DEBUG_WAVELET_TREE
00049 #include "testutils.hpp"
00050 #endif
00051 
00053 namespace sdsl
00054 {
00055 
00056 const int_vector<>::size_type ZoO[2] = {0, (int_vector<>::size_type)-1};
00057 
00058 template<class size_type>
00059 struct _node {
00060     size_type   tree_pos;               // pointer into the bit_vector, which represents the wavelet tree
00061     size_type   tree_pos_rank;  // pre-calculated rank for the prefix up to but not including tree_pos
00062     uint16_t    parent;                 // pointer to the parent
00063     uint16_t    child[2];               // pointer to the children
00064 
00065     _node(size_type tree_pos=0, size_type tree_pos_rank=0, uint16_t parent=_undef_node,
00066           uint16_t child_left=_undef_node, uint16_t child_right=_undef_node):
00067         tree_pos(tree_pos), tree_pos_rank(tree_pos_rank), parent(parent) {
00068         child[0] = child_left;
00069         child[1] = child_right;
00070     }
00071 
00072     _node& operator=(const _node& v) {
00073         if (this != &v) {
00074             tree_pos            = v.tree_pos;
00075             tree_pos_rank       = v.tree_pos_rank;
00076             parent                      = v.parent;
00077             child[0]            = v.child[0];
00078             child[1]            = v.child[1];
00079         }
00080         return *this;
00081     }
00082 
00083     size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const {
00084         structure_tree_node* st_child = structure_tree::add_child(v, name, util::class_name(*this));
00085         size_type written_bytes = 0;
00086         written_bytes += util::write_member(tree_pos, out);
00087         written_bytes += util::write_member(tree_pos_rank, out);
00088         written_bytes += util::write_member(parent, out);
00089         out.write((char*)child, 2*sizeof(child[0]));
00090         written_bytes += 2*sizeof(child[0]);
00091         structure_tree::add_size(st_child, written_bytes);
00092         return written_bytes;
00093     }
00094 
00095     void load(std::istream& in) {
00096         util::read_member(tree_pos, in);
00097         util::read_member(tree_pos_rank, in);
00098         util::read_member(parent, in);
00099         in.read((char*) child, 2*sizeof(child[0]));
00100     }
00101 };
00102 
00104 
00121 template<class BitVector                 = bit_vector,
00122               class RankSupport                  = typename BitVector::rank_1_type,
00123               class SelectSupport        = typename BitVector::select_1_type,
00124               class SelectSupportZero = typename BitVector::select_0_type,
00125               bool dfs_shape=0 >
00126 class wt_huff
00127 {
00128     public:
00129         typedef int_vector<>::size_type size_type;
00130         typedef unsigned char                   value_type;
00131         typedef BitVector                               bit_vector_type;
00132         typedef RankSupport                             rank_1_type;
00133         typedef SelectSupport           select_1_type;
00134         typedef SelectSupportZero               select_0_type;
00135 
00136     private:
00137 #ifdef WT_HUFF_CACHE
00138         mutable value_type m_last_access_answer;
00139         mutable size_type  m_last_access_i;
00140         mutable size_type  m_last_access_rl;
00141 #endif
00142 
00143         size_type                       m_size;
00144         size_type                       m_sigma;                //<- \f$ |\Sigma| \f$
00145         bit_vector_type         m_tree;                 // bit vector to store the wavelet tree
00146         RankSupport                     m_tree_rank;    // rank support for the wavelet tree bit vector
00147         SelectSupport           m_tree_select1; // select support for the wavelet tree bit vector
00148         SelectSupportZero       m_tree_select0;
00149 
00150         _node<size_type>        m_nodes[511];    // nodes for the Huffman tree structure
00151         uint16_t                        m_c_to_leaf[256];// map symbol c to a leaf in the tree structure
00152         // if m_c_to_leaf[c] == _undef_node the char does
00153         // not exists in the text
00154         uint64_t                        m_path[256];     // path information for each char; the bits at position
00155         // 0..55 hold path information; bits 56..63 the length
00156         // of the path in binary representation
00157 //              wavelet_tree<>          m_check;
00158 
00159         typedef std::pair<size_type, size_type> tPII;  // pair (frequency, node_number) for constructing the Huffman tree
00160         typedef std::priority_queue<tPII, std::vector<tPII>, std::greater<tPII> >  tMPQPII; // minimum priority queue
00161 
00162         void copy(const wt_huff& wt) {
00163             m_size                      = wt.m_size;
00164             m_sigma             = wt.m_sigma;
00165             m_tree                      = wt.m_tree;
00166             m_tree_rank         = wt.m_tree_rank;
00167             m_tree_rank.set_vector(&m_tree);
00168             m_tree_select1      = wt.m_tree_select1;
00169             m_tree_select1.set_vector(&m_tree);
00170             m_tree_select0      = wt.m_tree_select0;
00171             m_tree_select0.set_vector(&m_tree);
00172             for (size_type i=0; i < 511; ++i)
00173                 m_nodes[i] = wt.m_nodes[i];
00174             for (size_type i=0; i<256; ++i)
00175                 m_c_to_leaf[i] = wt.m_c_to_leaf[i];
00176             for (size_type i=0; i<256; ++i) {
00177                 m_path[i] = wt.m_path[i];
00178             }
00179 //                      m_check = wt.m_check;
00180         }
00181 
00182         // insert a character into the wavelet tree, see constuct method
00183         void insert_char(uint8_t old_chr, size_type* tree_pos, size_type times, bit_vector& f_tree) {
00184             uint32_t path_len = (m_path[old_chr]>>56);
00185             uint64_t p = m_path[old_chr];
00186             for (uint32_t node=0, l=0; l<path_len; ++l, p >>= 1) {
00187                 if (p&1) {
00188                     f_tree.set_int(tree_pos[node], 0xFFFFFFFFFFFFFFFFULL,times);
00189                 }
00190                 tree_pos[node] += times;
00191                 node = m_nodes[node].child[p&1];
00192             }
00193         }
00194 
00195         // calculates the Huffman tree and returns the size of the WT bit vector
00196         size_type construct_huffman_tree(size_type* C) {
00197             tMPQPII pq; // priority queue
00198             std::vector<_node<size_type> > temp_nodes(2*m_sigma-1);  // vector for nodes of the Huffman tree
00199             size_type node_cnt=0;                                                               // counter for the nodes
00200             for (size_type i=0; i < 256; ++i) // add leafs of Huffman tree
00201                 if (C[i] > 0) {
00202                     pq.push(tPII(C[i], node_cnt));   // push (frequency, pointer to node)
00203                     temp_nodes[node_cnt++] = _node<size_type>(C[i], i);   // initial tree_pos with number of occurences
00204                     // and tree_pos_rank value with the code of the corresponding char
00205                     // parent, child[0], and child[1] are set to _undef_node
00206                 }
00207             while (pq.size() > 1) {
00208                 tPII v1, v2;
00209                 v1 = pq.top(); pq.pop();
00210                 v2 = pq.top(); pq.pop();
00211                 temp_nodes[ v1.second ].parent = node_cnt; // parent is new node
00212                 temp_nodes[ v2.second ].parent = node_cnt; // parent is new node
00213                 size_type frq_sum = v1.first + v2.first;
00214                 pq.push(tPII(frq_sum, node_cnt));   // push new node to the priority queue
00215                 temp_nodes[ node_cnt++ ] = _node<size_type>(frq_sum, 0, _undef_node, v1.second, v2.second);
00216             }
00217             // Convert Huffman tree into breadth first search order in memory and
00218             // calculate tree_pos values
00219             m_nodes[0] = temp_nodes[node_cnt-1];  // insert root at index 0
00220             size_type tree_size = 0;
00221             node_cnt = 1;
00222             uint16_t last_parent = _undef_node;
00223             std::deque<size_type> q;
00224             q.push_back(0);
00225             while (!q.empty()) {
00226                 size_type idx;
00227                 if (!dfs_shape) {
00228                     idx = q.front(); q.pop_front();
00229                 } else {
00230                     idx = q.back(); q.pop_back();
00231                 }
00232                 size_type frq = m_nodes[idx].tree_pos; // frq_sum was stored in tree_pos
00233                 m_nodes[idx].tree_pos = tree_size;
00234                 if (m_nodes[idx].child[0] != _undef_node)  // if node is not a leaf
00235                     tree_size += frq;                                      // add frequency, as leaves have size 0
00236                 if (idx > 0) { // node is not the root
00237                     if (last_parent != m_nodes[idx].parent)
00238                         m_nodes[m_nodes[idx].parent].child[0] = idx;
00239                     else
00240                         m_nodes[m_nodes[idx].parent].child[1] = idx;
00241                     last_parent = m_nodes[idx].parent;
00242                 }
00243                 if (m_nodes[idx].child[0] != _undef_node) { // if node is not a leaf
00244                     for (size_type k=0; k<2; ++k) {                     // add children to tree
00245                         m_nodes[node_cnt] = temp_nodes[ m_nodes[idx].child[k] ];
00246                         m_nodes[node_cnt].parent = idx;
00247                         q.push_back(node_cnt);
00248                         m_nodes[idx].child[k] = node_cnt++;
00249                     }
00250                 }
00251             }
00252 
00253             // initialize m_c_to_leaf
00254             for (size_type i=0; i<256; ++i)
00255                 m_c_to_leaf[i] = _undef_node; // if c is not in the alphabet m_c_to_leaf[c] = _undef_node
00256             for (size_type i=0; i < 2*sigma-1; ++i) {
00257                 if (m_nodes[i].child[0] == _undef_node)                                 // if node is a leaf
00258                     m_c_to_leaf[(uint8_t)m_nodes[i].tree_pos_rank] = i; // calculate value
00259             }
00260             // initialize path information
00261             // Note: In the case of a bfs search order,
00262             // we can classify nodes as rigth child and left child with an easy criterion:
00263             //       node is a left child, if node%2==1
00264             //       node is a rigth child, if node%2==0
00265             for (size_type c=0; c<256; ++c) {
00266                 if (m_c_to_leaf[c] != _undef_node) { // if char exists in the alphabet
00267                     size_type node = m_c_to_leaf[c];
00268                     uint64_t w = 0; // path
00269                     uint64_t l = 0; // path len
00270                     while (node != 0) { // while node is not the root
00271                         w <<= 1;
00272 //                                              if( (node & 1) == 0 )// if the node is a right child
00273 //                                                      w |= 1ULL;
00274                         if (m_nodes[m_nodes[node].parent].child[1] == node)
00275                             w |= 1ULL;
00276                         ++l;
00277                         node = m_nodes[node].parent; // go up the tree
00278                     }
00279                     if (l > 56) {
00280                         std::cerr<<"Huffman tree has max depth > 56!!! ERROR"<<std::endl;
00281                         throw std::logic_error("Huffman tree size is greater than 56!!!");
00282                     }
00283                     m_path[c] = w | (l << 56);
00284                 } else {
00285                     m_path[c] = 0; // i.e. len is also 0, good for special case in rank()
00286                 }
00287             }
00288             return tree_size;
00289         }
00290 
00291         void construct_init_rank_select() {
00292             util::init_support(m_tree_rank, &m_tree);
00293             util::init_support(m_tree_select0, &m_tree);
00294             util::init_support(m_tree_select1, &m_tree);
00295         }
00296 
00297         void construct_precalc_node_ranks() {
00298             for (size_type i=0; i<2*m_sigma-1; ++i) {
00299                 if (m_nodes[i].child[0] != _undef_node)  // if node is not a leaf
00300                     m_nodes[i].tree_pos_rank = m_tree_rank(m_nodes[i].tree_pos);
00301             }
00302         }
00303 
00304 
00305         // recursive internal version of the method interval_symbols
00306         void _interval_symbols(size_type i, size_type j, size_type& k,
00307                                std::vector<unsigned char>& cs,
00308                                std::vector<size_type>& rank_c_i,
00309                                std::vector<size_type>& rank_c_j, uint16_t node) const {
00310             // invariant: j>i
00311             // goto right child
00312             size_type i_new = (m_tree_rank(m_nodes[node].tree_pos + i) - m_nodes[node].tree_pos_rank);
00313             size_type j_new = (m_tree_rank(m_nodes[node].tree_pos + j) - m_nodes[node].tree_pos_rank);
00314             if (i_new!=j_new) {
00315                 uint16_t node_new = m_nodes[node].child[1];
00316                 // if node is not a leaf
00317                 if (m_nodes[node_new].child[0] != _undef_node) {
00318                     _interval_symbols(i_new, j_new, k, cs, rank_c_i, rank_c_j, node_new);
00319                 } else {
00320                     rank_c_i[k] = i_new;
00321                     rank_c_j[k] = j_new;
00322                     cs[k++] = m_nodes[node_new].tree_pos_rank;
00323                 }
00324             }
00325             // goto left child
00326             i -= i_new; j -= j_new;
00327             if (i != j) {
00328                 uint16_t node_new = m_nodes[node].child[0];
00329                 // if node is not a leaf
00330                 if (m_nodes[node_new].child[0] != _undef_node) {
00331                     _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, node_new);
00332                 } else {
00333                     rank_c_i[k] = i;
00334                     rank_c_j[k] = j;
00335                     cs[k++] = m_nodes[node_new].tree_pos_rank;
00336                 }
00337             }
00338         }
00339 
00340 
00341     public:
00342 
00343         const size_type& sigma;
00344         const bit_vector_type& tree;
00345 
00346         // Default constructor
00347         wt_huff():m_size(0),m_sigma(0), sigma(m_sigma),tree(m_tree) {};
00348 
00349 
00350 
00352 
00358         template<typename RandomAccessContainer>
00359         wt_huff(const RandomAccessContainer& rac, size_type size):m_size(size), m_sigma(0), sigma(m_sigma), tree(m_tree) {
00360             construct(rac, size);
00361         }
00362 
00363         template<uint8_t w>
00364         wt_huff(const int_vector<w>& rac):m_size(rac.size()), m_sigma(0), sigma(m_sigma), tree(m_tree) {
00365             construct(rac, rac.size());
00366         }
00367 
00368 
00369         template<typename RandomAccessContainer>
00370         void construct(const RandomAccessContainer& rac, size_type size) {
00371             m_size = size;
00372             if (m_size == 0)
00373                 return;
00374             // O(n + |\Sigma|\log|\Sigma|) algorithm for calculating node sizes
00375             size_type C[256] = {0};
00376             //  1. Count occurences of characters
00377             for (size_type i=0; i < size; ++i) {
00378                 ++C[rac[i]];
00379             }
00380             // 2. Calculate effective alphabet size
00381             calculate_effective_alphabet_size(C, m_sigma);
00382             // 3. Generate Huffman tree
00383             size_type tree_size = construct_huffman_tree(C);
00384             // 4. Generate wavelet tree bit sequence m_tree
00385 
00386             bit_vector tmp_tree(tree_size, 0);  // initialize bit_vector for the tree
00387             //  Calculate starting position of wavelet tree nodes
00388             size_type tree_pos[511];
00389             for (size_type i=0; i < 2*sigma-1; ++i) {
00390                 tree_pos[i] = m_nodes[i].tree_pos;
00391             }
00392             uint8_t old_chr = rac[0], times = 0;
00393             for (size_type i=0; i < m_size; ++i) {
00394                 uint8_t chr = rac[i];
00395                 if (chr != old_chr) {
00396                     insert_char(old_chr, tree_pos, times, tmp_tree);
00397                     times = 1;
00398                     old_chr = chr;
00399                 } else { // chr == old_chr
00400                     ++times;
00401                     if (times == 64) {
00402                         insert_char(old_chr, tree_pos, times, tmp_tree);
00403                         times = 0;
00404                     }
00405                 }
00406             }
00407             if (times > 0) {
00408                 insert_char(old_chr, tree_pos, times, tmp_tree);
00409             }
00410             util::assign(m_tree, tmp_tree);
00411             // 5. Initialize rank and select data structures for m_tree
00412             construct_init_rank_select();
00413             // 6. Finish inner nodes by precalculating the tree_pos_rank values
00414             construct_precalc_node_ranks();
00415         }
00416 
00417         template<class size_type_class>
00418         wt_huff(int_vector_file_buffer<8, size_type_class>& rac, size_type size):m_size(size), m_sigma(0), sigma(m_sigma), tree(m_tree) {
00419             construct(rac, size);
00420         }
00421 
00423 
00426         template<class size_type_class>
00427         void construct(int_vector_file_buffer<8, size_type_class>& rac, size_type size) {
00428 //              m_check.construct(rac, size);
00429             m_size = size;
00430             if (m_size == 0)
00431                 return;
00432             // O(n + |\Sigma|\log|\Sigma|) algorithm for calculating node sizes
00433             size_type C[256] = {0};
00434             // 1. Count occurrences of characters
00435             calculate_character_occurences(rac, m_size, C);
00436             // 2. Calculate effective alphabet size
00437             calculate_effective_alphabet_size(C, m_sigma);
00438             // 3. Generate Huffman tree
00439             size_type tree_size = construct_huffman_tree(C);
00440             // 4. Generate wavelet tree bit sequence m_tree
00441 
00442             bit_vector tmp_tree(tree_size, 0);  // initialize bit_vector for the tree
00443             //  Calculate starting position of wavelet tree nodes
00444             size_type tree_pos[511];
00445             for (size_type i=0; i < 2*sigma-1; ++i) {
00446                 tree_pos[i] = m_nodes[i].tree_pos;
00447             }
00448             rac.reset();
00449             if (rac.int_vector_size < size) {
00450                 throw std::logic_error("wt_huff::construct: stream size is smaller than size!");
00451                 return;
00452             }
00453             for (size_type i=0, r_sum=0, r = rac.load_next_block(); r_sum < m_size;) {
00454                 if (r_sum + r > size) {  // read not more than size chars in the next loop
00455                     r = size-r_sum;
00456                 }
00457                 uint8_t old_chr = rac[i-r_sum], times = 0;
00458                 for (; i < r_sum+r; ++i) {
00459                     uint8_t chr = rac[i-r_sum];
00460                     if (chr     != old_chr) {
00461                         insert_char(old_chr, tree_pos, times, tmp_tree);
00462                         times = 1;
00463                         old_chr = chr;
00464                     } else { // chr == old_chr
00465                         ++times;
00466                         if (times == 64) {
00467                             insert_char(old_chr, tree_pos, times, tmp_tree);
00468                             times = 0;
00469                         }
00470                     }
00471                 }
00472                 if (times > 0) {
00473                     insert_char(old_chr, tree_pos, times, tmp_tree);
00474                 }
00475                 r_sum += r; r = rac.load_next_block();
00476             }
00477             util::assign(m_tree, tmp_tree);
00478             // 5. Initialize rank and select data structures for m_tree
00479             construct_init_rank_select();
00480             // 6. Finish inner nodes by precalculating the tree_pos_rank values
00481             construct_precalc_node_ranks();
00482         }
00483 
00484 
00486         wt_huff(const wt_huff& wt):sigma(m_sigma), tree(m_tree) {
00487             copy(wt);
00488         }
00489 
00491         wt_huff& operator=(const wt_huff& wt) {
00492             if (this != &wt) {
00493                 copy(wt);
00494             }
00495             return *this;
00496         }
00497 
00499         void swap(wt_huff& wt) {
00500             if (this != &wt) {
00501                 std::swap(m_size, wt.m_size);
00502                 std::swap(m_sigma,  wt.m_sigma);
00503                 m_tree.swap(wt.m_tree);
00504                 util::swap_support(m_tree_rank, wt.m_tree_rank, &m_tree, &(wt.m_tree));
00505 
00506                 util::swap_support(m_tree_select1, wt.m_tree_select1, &m_tree, &(wt.m_tree));
00507                 util::swap_support(m_tree_select0, wt.m_tree_select0, &m_tree, &(wt.m_tree));
00508 
00509                 for (size_type i=0; i < 511; ++i)
00510                     std::swap(m_nodes[i], wt.m_nodes[i]);
00511                 for (size_type i=0; i<256; ++i)
00512                     std::swap(m_c_to_leaf[i], wt.m_c_to_leaf[i]);
00513                 for (size_type i=0; i<256; ++i)
00514                     std::swap(m_path[i], wt.m_path[i]);
00515 
00516 //                      m_check.swap( wt.m_check );
00517             }
00518         }
00519 
00521         size_type size()const {
00522             return m_size;
00523         }
00524 
00526         bool empty()const {
00527             return m_size == 0;
00528         }
00529 
00531 
00537         value_type operator[](size_type i)const { // TODO: Maybe it is good to integrate a cache here
00538             // which stores how many of the next symbols are equal
00539             // with the current char
00540             size_type node = 0; // start at root node
00541             while (m_nodes[node].child[0] != _undef_node) { // while node is not a leaf
00542                 if (m_tree[ m_nodes[node].tree_pos + i]) {  // goto the right child
00543                     i = m_tree_rank(m_nodes[node].tree_pos + i) - m_nodes[node].tree_pos_rank;
00544                     node = m_nodes[node].child[1];
00545                 } else { // goto the left child
00546                     i -= (m_tree_rank(m_nodes[node].tree_pos + i) - m_nodes[node].tree_pos_rank);
00547                     node = m_nodes[node].child[0];
00548                 }
00549             }
00550             return m_nodes[node].tree_pos_rank;
00551         };
00552 
00554 
00561         size_type rank(size_type i, value_type c)const {
00562             uint64_t p = m_path[c];
00563             uint32_t path_len = (m_path[c]>>56); // equals zero if char was not present in the original text or m_sigma=1
00564             if (!path_len and 1 == m_sigma) {    // if m_sigma == 1 return result immediately
00565                 if (m_c_to_leaf[c] == _undef_node) { // if character does not exist return 0
00566                     return 0;
00567                 }
00568                 return std::min(i, m_size);
00569             }
00570             size_type result = i & ZoO[path_len>0]; // important: result has type size_type and ZoO has type size_type
00571             uint32_t node=0;
00572             for (uint32_t l=0; l<path_len and result; ++l, p >>= 1) {
00573 //                      result = (ZoO[1-(p&1)]&result) - (ZoO[1-(p&1)] ^ (m_tree_rank(m_nodes[node].tree_pos + result) - m_nodes[node].tree_pos_rank) );
00574                 if (p&1) {
00575                     result      = (m_tree_rank(m_nodes[node].tree_pos+result) -  m_nodes[node].tree_pos_rank);
00576                 } else {
00577                     result -= (m_tree_rank(m_nodes[node].tree_pos+result) -  m_nodes[node].tree_pos_rank);
00578                 }
00579                 node = m_nodes[node].child[p&1]; // goto child
00580             }
00581             return result;
00582         };
00583 
00585 
00592         size_type rank_ith_symbol(size_type i, value_type& c)const {
00593             // TODO: handle m_sigma=1
00594             assert(i>=0 and i < size());
00595             uint32_t node=0;
00596             while (m_nodes[node].child[0] != _undef_node) { // while node is not a leaf
00597                 if (m_tree[m_nodes[node].tree_pos + i]) { // if bit is set at position goto right child
00598                     i   = (m_tree_rank(m_nodes[node].tree_pos + i) -  m_nodes[node].tree_pos_rank);
00599                     node = m_nodes[node].child[1];
00600                 } else { // goto left child
00601                     i -= (m_tree_rank(m_nodes[node].tree_pos + i) -  m_nodes[node].tree_pos_rank);
00602                     node = m_nodes[node].child[0];
00603                 }
00604             }
00605             c = m_nodes[node].tree_pos_rank;
00606             return i;
00607         }
00608 
00610 
00616         size_type select(size_type i, value_type c)const {
00617             uint16_t node = m_c_to_leaf[c];
00618             if (node == _undef_node) { // if c was not present in the original text
00619                 return m_size;             // -> return a position right to the end
00620             }
00621             if (m_sigma == 1) {
00622                 return std::min(i-1,m_size);
00623             }
00624             /*          if( i == 0 ){
00625                                 std::cerr<<"WARNING: i=0"<<std::endl;
00626                                 return m_size;
00627                         }
00628                         if( i > rank(size(), c) ){
00629                                 std::cerr<<"WARNING: i="<<i<<" > rank(size(),"<<c<<")="<<rank(size(),c)<<std::endl;
00630                                 return m_size;
00631                         }
00632             */
00633             size_type result = i-1;             // otherwise
00634             uint64_t p = m_path[c];
00635             uint32_t path_len = (p>>56);
00636             p <<= (64-path_len); // Note: path_len > 0, since we have handled m_sigma = 1.
00637             for (uint32_t l=0; l<path_len; ++l, p <<= 1) {
00638 //                      if( node & 1 ){ // node was a left child, in the case of bfs order
00639                 if ((p & 0x8000000000000000ULL)==0) { // node was a left child
00640                     node = m_nodes[node].parent;
00641                     result = m_tree_select0(m_nodes[node].tree_pos-m_nodes[node].tree_pos_rank + result + 1)
00642                              - m_nodes[node].tree_pos;
00643                 } else { // node was a right child
00644                     node = m_nodes[node].parent;
00645                     result = m_tree_select1(m_nodes[node].tree_pos_rank + result + 1)
00646                              - m_nodes[node].tree_pos;
00647                 }
00648             }
00649             /*          size_type r2 = m_check.select(i, c);
00650                         if( r2 != result ){
00651                                 std::cerr<<"ERROR select r="<<result<<" != "<<r2<<"=r2 for input ("<<i<<","<<c<<")"<<std::endl;
00652                                 return r2;
00653                         }
00654             */
00655             return result;
00656         };
00657 
00658 
00660 
00676         void interval_symbols(size_type i, size_type j, size_type& k,
00677                               std::vector<unsigned char>& cs,
00678                               std::vector<size_type>& rank_c_i,
00679                               std::vector<size_type>& rank_c_j) const {
00680             if (i==j) {
00681                 k = 0;
00682                 return;
00683             } else if ((j-i)==1) {
00684                 k = 1;
00685                 rank_c_i[0] = rank_ith_symbol(i, cs[0]);
00686                 rank_c_j[0] = rank_c_i[0]+1;
00687                 return;
00688             } else if ((j-i)==2) {
00689                 rank_c_i[0] = rank_ith_symbol(i, cs[0]);
00690                 rank_c_i[1] = rank_ith_symbol(i+1, cs[1]);
00691                 if (cs[0]==cs[1]) {
00692                     k = 1;
00693                     rank_c_j[0] = rank_c_i[0]+2;
00694                     return;
00695                 } else {
00696                     k = 2;
00697                     rank_c_j[0] = rank_c_i[0]+1;
00698                     rank_c_j[1] = rank_c_i[1]+1;
00699                     return;
00700                 }
00701             } else {
00702                 k = 0;
00703                 _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, 0);
00704             }
00705         }
00706 
00707 
00708 
00710         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const {
00711             structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
00712             size_type written_bytes = 0;
00713             written_bytes += util::write_member(m_size, out, child, "size");
00714             written_bytes += util::write_member(m_sigma, out, child, "sigma");
00715             written_bytes += m_tree.serialize(out, child, "tree");
00716             written_bytes += m_tree_rank.serialize(out, child, "tree_rank");
00717             written_bytes += m_tree_select1.serialize(out, child, "tree_select_1");
00718             written_bytes += m_tree_select0.serialize(out, child, "tree_select_0");
00719             for (size_type i=0; i < 511; ++i) {
00720                 written_bytes += m_nodes[i].serialize(out);
00721             }
00722             out.write((char*) m_c_to_leaf, 256*sizeof(m_c_to_leaf[0]));
00723             written_bytes += 256*sizeof(m_c_to_leaf[0]); // add written bytes from previous loop
00724             out.write((char*) m_path, 256*sizeof(m_path[0]));
00725             written_bytes += 256*sizeof(m_path[0]); // add written bytes from previous loop
00726             structure_tree::add_size(child, written_bytes);
00727             return written_bytes;
00728         }
00729 
00731         void load(std::istream& in) {
00732             util::read_member(m_size, in);
00733             util::read_member(m_sigma, in);
00734             m_tree.load(in);
00735             m_tree_rank.load(in, &m_tree);
00736             m_tree_select1.load(in, &m_tree);
00737             m_tree_select0.load(in, &m_tree);
00738             for (size_type i=0; i < 511; ++i) {
00739                 m_nodes[i].load(in);
00740             }
00741             in.read((char*) m_c_to_leaf, 256*sizeof(m_c_to_leaf[0]));
00742             in.read((char*) m_path, 256*sizeof(m_path[0]));
00743         }
00744 
00745 };
00746 
00747 typedef wt_huff<rrr_vector<>,
00748         rrr_vector<>::rank_1_type,
00749         rrr_vector<>::select_1_type,
00750         rrr_vector<>::select_0_type, 0> wt_huff_rrr;
00751 
00752 }// end namespace sdsl
00753 
00754 #endif // end file