SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2009 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef INCLUDED_SDSL_NEAREST_NEIGHBOUR_DICTIONARY 00022 #define INCLUDED_SDSL_NEAREST_NEIGHBOUR_DICTIONARY 00023 00024 #include "bitmagic.hpp" 00025 #include "int_vector.hpp" 00026 #include "rank_support.hpp" 00027 #include "util.hpp" 00028 #include <stdexcept> 00029 #include <string> 00030 00032 namespace sdsl 00033 { 00034 00036 00047 // TODO: implement an iterator for the ones in the nearest neighbour dictionary!!! used in the construction of the balanced parentheses support 00048 template<uint8_t sample_dens> 00049 class nearest_neighbour_dictionary 00050 { 00051 public: 00052 typedef bit_vector::size_type size_type; 00053 private: 00054 int_vector<> m_abs_samples; // absolute samples array corresponds to array \f$ A_1 \f$ in the paper 00055 int_vector<> m_differences; // vector for the differences in between the samples; corresponds to array \f$ A_2 \f$ in the paper 00056 size_type m_ones; // corresponds to N in the paper 00057 size_type m_size; // corresponds to M in the paper 00058 bit_vector m_contains_abs_sample; // vector which stores for every block of length sample_dens of the original bit_vector if an absolute sample lies in this block. 00059 // Corresponds to array \f$ A_3 \f$ in the paper. 00060 rank_support_v<> m_rank_contains_abs_sample; // rank support for m_contains_abs_sample. Corresponds to array \f$ A_4 \f$ in the paper. 00061 // NOTE: A faster version should store the absolute samples and the differences interleaved 00062 00063 void copy(const nearest_neighbour_dictionary& nnd) { 00064 // copy all members of the data structure 00065 m_abs_samples = nnd.m_abs_samples; 00066 m_differences = nnd.m_differences; 00067 m_ones = nnd.m_ones; 00068 m_size = nnd.m_size; 00069 m_contains_abs_sample = nnd.m_contains_abs_sample; 00070 m_rank_contains_abs_sample = nnd.m_rank_contains_abs_sample; 00071 m_rank_contains_abs_sample.set_vector(&m_contains_abs_sample); 00072 } 00073 00074 public: 00075 00077 nearest_neighbour_dictionary():m_ones(0),m_size(0) { 00078 } 00079 00081 00083 nearest_neighbour_dictionary(const bit_vector& v):m_ones(0), m_size(0) { 00084 if (sample_dens==0) { // first logical error check 00085 throw std::logic_error(util::demangle(typeid(this).name())+": sample_dens should not be equal 0!"); 00086 } 00087 size_type max_distance_between_two_ones = 0; 00088 size_type ones = 0; // counter for the ones in v 00089 00090 // get maximal distance between to ones in the bit vector 00091 // speed this up by broadword computing 00092 for (size_type i=0, last_one_pos_plus_1=0; i < v.size(); ++i) { 00093 if (v[i]) { 00094 if (i+1-last_one_pos_plus_1 > max_distance_between_two_ones) 00095 max_distance_between_two_ones = i+1-last_one_pos_plus_1; 00096 last_one_pos_plus_1 = i+1; 00097 ++ones; 00098 00099 } 00100 } 00101 m_ones = ones; 00102 m_size = v.size(); 00103 // std::cerr<<ones<<std::endl; 00104 // initialize absolute samples m_abs_samples[0]=0 00105 m_abs_samples = int_vector<>(m_ones/sample_dens + 1, 0, bit_magic::l1BP(v.size())+1); 00106 // initialize different values 00107 m_differences = int_vector<>(m_ones - m_ones/sample_dens, 0, bit_magic::l1BP(max_distance_between_two_ones)+1); 00108 // initialize m_contains_abs_sample 00109 m_contains_abs_sample = bit_vector((v.size()+sample_dens-1)/sample_dens, 0); 00110 ones = 0; 00111 for (size_type i=0, last_one_pos=0; i < v.size(); ++i) { 00112 if (v[i]) { 00113 ++ones; 00114 if ((ones % sample_dens) == 0) { // insert absolute samples 00115 m_abs_samples[ones/sample_dens] = i; 00116 m_contains_abs_sample[i/sample_dens] = 1; 00117 } else { 00118 m_differences[ones - ones/sample_dens - 1] = i - last_one_pos; 00119 } 00120 last_one_pos = i; 00121 } 00122 } 00123 util::init_support(m_rank_contains_abs_sample, &m_contains_abs_sample); 00124 } 00125 00127 nearest_neighbour_dictionary(const nearest_neighbour_dictionary& nnd) { 00128 // copy all members of the data structure 00129 copy(nnd); 00130 } 00131 00133 ~nearest_neighbour_dictionary() {} 00134 00135 nearest_neighbour_dictionary& operator=(const nearest_neighbour_dictionary& nnd) { 00136 if (this != &nnd) { 00137 copy(nnd); 00138 } 00139 return *this; 00140 } 00141 00142 void swap(nearest_neighbour_dictionary& nnd) { 00143 // copy all members of the data structure 00144 m_abs_samples.swap(nnd.m_abs_samples); 00145 m_differences.swap(nnd.m_differences); 00146 std::swap(m_ones, nnd.m_ones); 00147 std::swap(m_size, nnd.m_size); 00148 m_contains_abs_sample.swap(nnd.m_contains_abs_sample); 00149 util::swap_support(m_rank_contains_abs_sample, nnd.m_rank_contains_abs_sample, 00150 &m_contains_abs_sample, &(nnd.m_contains_abs_sample)); 00151 } 00152 00154 00158 size_type rank(size_type idx)const { 00159 assert(idx <= m_size); 00160 size_type r = m_rank_contains_abs_sample.rank(idx/sample_dens); // 00161 size_type result = r*sample_dens; 00162 size_type i = m_abs_samples[r]; 00163 while (++result <= m_ones) { 00164 if ((result % sample_dens) == 0) { 00165 i = m_abs_samples[result/sample_dens]; 00166 } else { 00167 i = i+m_differences[result - result/sample_dens-1]; 00168 } 00169 if (i >= idx) 00170 return result-1; 00171 } 00172 return result-1; 00173 }; 00174 00176 00180 size_type select(size_type i)const { 00181 assert(i > 0 and i <= m_ones); 00182 size_type j = i/sample_dens; 00183 size_type result = m_abs_samples[j]; 00184 j = j*sample_dens - j; 00185 for (size_type end = j + (i%sample_dens); j < end; ++j) { 00186 result += m_differences[j]; 00187 } 00188 return result; 00189 } 00190 00192 00197 size_type prev(size_type i)const { 00198 size_type r = rank(i+1); 00199 assert(r>0); 00200 return select(r); 00201 } 00208 size_type next(size_type i)const { 00209 size_type r = rank(i); 00210 assert(r < m_ones); 00211 return select(r+1); 00212 } 00213 00214 size_type size()const { 00215 return m_size; 00216 } 00217 00218 size_type ones()const { 00219 return m_ones; 00220 } 00221 00223 00225 size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const { 00226 size_type written_bytes = 0; 00227 structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 00228 written_bytes += m_abs_samples.serialize(out, child, "absolute_samples"); 00229 written_bytes += m_differences.serialize(out, child, "differences"); 00230 written_bytes += util::write_member(m_ones, out, child, "ones"); 00231 written_bytes += util::write_member(m_size,out, child, "size"); 00232 written_bytes += m_contains_abs_sample.serialize(out, child, "contains_abs_sample"); 00233 written_bytes += m_rank_contains_abs_sample.serialize(out, child, "rank_contains_abs_sample"); 00234 structure_tree::add_size(child, written_bytes); 00235 return written_bytes; 00236 } 00237 00239 00241 void load(std::istream& in) { 00242 m_abs_samples.load(in); 00243 m_differences.load(in); 00244 util::read_member(m_ones, in); 00245 util::read_member(m_size, in); 00246 m_contains_abs_sample.load(in); 00247 m_rank_contains_abs_sample.load(in, &m_contains_abs_sample); 00248 } 00249 }; 00250 00251 00252 }// end namespace sdsl 00253 00254 00255 #endif // end file