SDSL: Succinct Data Structure Library: sdsl/include/sdsl/nearest_neighbour

Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2009 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef INCLUDED_SDSL_NEAREST_NEIGHBOUR_DICTIONARY
00022 #define INCLUDED_SDSL_NEAREST_NEIGHBOUR_DICTIONARY
00023 
00024 #include "bitmagic.hpp"
00025 #include "int_vector.hpp"
00026 #include "rank_support.hpp"
00027 #include "util.hpp"
00028 #include <stdexcept>
00029 #include <string>
00030 
00032 namespace sdsl
00033 {
00034 
00036 
00047 // TODO: implement an iterator for the ones in the nearest neighbour dictionary!!! used in the construction of the balanced parentheses support
00048 template<uint8_t sample_dens>
00049 class nearest_neighbour_dictionary
00050 {
00051     public:
00052         typedef bit_vector::size_type size_type;
00053     private:
00054         int_vector<> m_abs_samples; // absolute samples array corresponds to array \f$ A_1 \f$ in the paper
00055         int_vector<> m_differences; // vector for the differences in between the samples; corresponds to array \f$ A_2 \f$ in the paper
00056         size_type    m_ones; // corresponds to N in the paper
00057         size_type    m_size; // corresponds to M in the paper
00058         bit_vector   m_contains_abs_sample; // vector which stores for every block of length sample_dens of the original bit_vector if an absolute sample lies in this block.
00059         // Corresponds to array \f$ A_3 \f$ in the paper.
00060         rank_support_v<>  m_rank_contains_abs_sample; // rank support for m_contains_abs_sample. Corresponds to array \f$ A_4 \f$ in the paper.
00061         // NOTE: A faster version should store the absolute samples and the differences interleaved
00062 
00063         void copy(const nearest_neighbour_dictionary& nnd) {
00064             // copy all members of the data structure
00065             m_abs_samples       =               nnd.m_abs_samples;
00066             m_differences       =               nnd.m_differences;
00067             m_ones                      =               nnd.m_ones;
00068             m_size                      =               nnd.m_size;
00069             m_contains_abs_sample =     nnd.m_contains_abs_sample;
00070             m_rank_contains_abs_sample = nnd.m_rank_contains_abs_sample;
00071             m_rank_contains_abs_sample.set_vector(&m_contains_abs_sample);
00072         }
00073 
00074     public:
00075 
00077         nearest_neighbour_dictionary():m_ones(0),m_size(0) {
00078         }
00079 
00081 
00083         nearest_neighbour_dictionary(const bit_vector& v):m_ones(0), m_size(0) {
00084             if (sample_dens==0) { // first logical error check
00085                 throw std::logic_error(util::demangle(typeid(this).name())+": sample_dens should not be equal 0!");
00086             }
00087             size_type max_distance_between_two_ones = 0;
00088             size_type ones = 0; // counter for the ones in v
00089 
00090             // get maximal distance between to ones in the bit vector
00091             // speed this up by broadword computing
00092             for (size_type i=0, last_one_pos_plus_1=0; i < v.size(); ++i) {
00093                 if (v[i]) {
00094                     if (i+1-last_one_pos_plus_1 > max_distance_between_two_ones)
00095                         max_distance_between_two_ones = i+1-last_one_pos_plus_1;
00096                     last_one_pos_plus_1 = i+1;
00097                     ++ones;
00098 
00099                 }
00100             }
00101             m_ones = ones;
00102             m_size = v.size();
00103 //                      std::cerr<<ones<<std::endl;
00104             // initialize absolute samples m_abs_samples[0]=0
00105             m_abs_samples = int_vector<>(m_ones/sample_dens + 1, 0,  bit_magic::l1BP(v.size())+1);
00106             // initialize different values
00107             m_differences = int_vector<>(m_ones - m_ones/sample_dens, 0, bit_magic::l1BP(max_distance_between_two_ones)+1);
00108             // initialize m_contains_abs_sample
00109             m_contains_abs_sample = bit_vector((v.size()+sample_dens-1)/sample_dens, 0);
00110             ones = 0;
00111             for (size_type i=0, last_one_pos=0; i < v.size(); ++i) {
00112                 if (v[i]) {
00113                     ++ones;
00114                     if ((ones % sample_dens) == 0) {  // insert absolute samples
00115                         m_abs_samples[ones/sample_dens] = i;
00116                         m_contains_abs_sample[i/sample_dens] = 1;
00117                     } else {
00118                         m_differences[ones - ones/sample_dens - 1] = i - last_one_pos;
00119                     }
00120                     last_one_pos = i;
00121                 }
00122             }
00123             util::init_support(m_rank_contains_abs_sample, &m_contains_abs_sample);
00124         }
00125 
00127         nearest_neighbour_dictionary(const nearest_neighbour_dictionary& nnd) {
00128             // copy all members of the data structure
00129             copy(nnd);
00130         }
00131 
00133         ~nearest_neighbour_dictionary() {}
00134 
00135         nearest_neighbour_dictionary& operator=(const nearest_neighbour_dictionary& nnd) {
00136             if (this != &nnd) {
00137                 copy(nnd);
00138             }
00139             return *this;
00140         }
00141 
00142         void swap(nearest_neighbour_dictionary& nnd) {
00143             // copy all members of the data structure
00144             m_abs_samples.swap(nnd.m_abs_samples);
00145             m_differences.swap(nnd.m_differences);
00146             std::swap(m_ones, nnd.m_ones);
00147             std::swap(m_size, nnd.m_size);
00148             m_contains_abs_sample.swap(nnd.m_contains_abs_sample);
00149             util::swap_support(m_rank_contains_abs_sample, nnd.m_rank_contains_abs_sample,
00150                                &m_contains_abs_sample, &(nnd.m_contains_abs_sample));
00151         }
00152 
00154 
00158         size_type rank(size_type idx)const {
00159             assert(idx <= m_size);
00160             size_type r = m_rank_contains_abs_sample.rank(idx/sample_dens); //
00161             size_type result = r*sample_dens;
00162             size_type i = m_abs_samples[r];
00163             while (++result <= m_ones) {
00164                 if ((result % sample_dens) == 0) {
00165                     i = m_abs_samples[result/sample_dens];
00166                 } else {
00167                     i = i+m_differences[result - result/sample_dens-1];
00168                 }
00169                 if (i >= idx)
00170                     return result-1;
00171             }
00172             return result-1;
00173         };
00174 
00176 
00180         size_type select(size_type i)const {
00181             assert(i > 0 and i <= m_ones);
00182             size_type j = i/sample_dens;
00183             size_type result = m_abs_samples[j];
00184             j = j*sample_dens - j;
00185             for (size_type end = j + (i%sample_dens); j < end; ++j) {
00186                 result += m_differences[j];
00187             }
00188             return result;
00189         }
00190 
00192 
00197         size_type prev(size_type i)const {
00198             size_type r = rank(i+1);
00199             assert(r>0);
00200             return select(r);
00201         }
00208         size_type next(size_type i)const {
00209             size_type r = rank(i);
00210             assert(r < m_ones);
00211             return select(r+1);
00212         }
00213 
00214         size_type size()const {
00215             return m_size;
00216         }
00217 
00218         size_type ones()const {
00219             return m_ones;
00220         }
00221 
00223 
00225         size_type serialize(std::ostream& out, structure_tree_node* v=NULL, std::string name="")const {
00226             size_type written_bytes = 0;
00227             structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
00228             written_bytes += m_abs_samples.serialize(out, child, "absolute_samples");
00229             written_bytes += m_differences.serialize(out, child, "differences");
00230                         written_bytes += util::write_member(m_ones, out, child, "ones");
00231                         written_bytes += util::write_member(m_size,out,  child, "size");
00232             written_bytes += m_contains_abs_sample.serialize(out, child, "contains_abs_sample");
00233             written_bytes += m_rank_contains_abs_sample.serialize(out, child, "rank_contains_abs_sample");
00234             structure_tree::add_size(child, written_bytes);
00235             return written_bytes;
00236         }
00237 
00239 
00241         void load(std::istream& in) {
00242             m_abs_samples.load(in);
00243             m_differences.load(in);
00244                         util::read_member(m_ones, in);
00245                         util::read_member(m_size, in);
00246             m_contains_abs_sample.load(in);
00247             m_rank_contains_abs_sample.load(in, &m_contains_abs_sample);
00248         }
00249 };
00250 
00251 
00252 }// end namespace sdsl
00253 
00254 
00255 #endif // end file