SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/util.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2009 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef INCLUDED_SDSL_UTIL
00022 #define INCLUDED_SDSL_UTIL
00023 
00024 #include "bitmagic.hpp"
00025 #include "typedefs.hpp"
00026 #include "structure_tree.hpp"
00027 #include <iosfwd>      // forward declaration of ostream
00028 #include <stdint.h>    // for uint64_t uint32_t declaration
00029 #include <cassert>
00030 #include <fstream>     // file stream for storeToFile and loadFromFile
00031 #include <ctime>       // for rand initialization
00032 #include <string>
00033 #include <string.h>    // for strlen and strdup
00034 #include <libgen.h>    // for basename
00035 #include <cstdlib>
00036 #include <unistd.h>    // for getpid 
00037 #include <sstream>     // for to_string method
00038 #include <stdexcept>   // for std::logic_error
00039 #include <typeinfo>    // for typeid
00040 
00041 
00042 // macros to transform a defined name to a string
00043 #define SDSL_STR(x) #x
00044 #define SDSL_XSTR(s) SDSL_STR(s)
00045 
00047 namespace sdsl
00048 {
00049 
00050 //class structure_tree_node; // forward declaration of data structure in structure_tree.hpp
00051 //class structure_tree; // forward declaration of data structure in structure.hpp
00052 
00053 template<uint8_t, class size_type_class>
00054 class int_vector_file_buffer; // forward declaration
00055 
00056 template<uint8_t, class size_type_class>
00057 class int_vector;        // forward declaration
00058 
00060 namespace util
00061 {
00062 
00063 static bool verbose = false;
00064 
00065 void set_verbose();
00066 
00068 std::string basename(const std::string& file_name);
00069 
00071 std::string dirname(const std::string& file_name);
00072 
00074 
00079 template<class int_vector_type>
00080 void set_random_bits(int_vector_type& v, int seed=0);
00082 template<class int_vector_type>
00083 void set_zero_bits(int_vector_type& v);
00085 template<class int_vector_type>
00086 void set_one_bits(int_vector_type& v);
00087 
00089 
00093 template<class int_vector_type>
00094 void bit_compress(int_vector_type& v);
00095 
00097 template<class int_vector_type, class size_type_class>
00098 void all_elements_mod(int_vector_type& v, size_type_class m);
00099 
00100 
00102 
00108 template<class int_vector_type>
00109 void set_all_values_to_k(int_vector_type& v, uint64_t k);
00110 
00112 template<class int_vector_type>
00113 void set_to_id(int_vector_type& v);
00114 
00116 
00119 template<class int_vector_type>
00120 typename int_vector_type::size_type get_one_bits(const int_vector_type& v);
00121 
00123 
00125 template<class int_vector_type>
00126 typename int_vector_type::size_type get_onezero_bits(const int_vector_type& v);
00127 
00129 
00131 template <class int_vector_type>
00132 typename int_vector_type::size_type get_zeroone_bits(const int_vector_type& v);
00133 
00135 
00139 template<class T>
00140 bool load_from_file(T& v, const char* file_name);
00141 
00142 template<>
00143 bool load_from_file(void*&, const char* file_name);
00144 
00145 template<class size_type_class>
00146 bool load_from_int_vector_buffer(unsigned char*& text, int_vector_file_buffer<8, size_type_class>& text_buf);
00147 
00149 /*  \pre v=NULL
00150  *
00151  */
00152 bool load_from_file(char*& v, const char* file_name);
00153 
00155 
00160 template<class T>
00161 bool store_to_file(const T& v, const char* file_name);
00162 
00164 bool store_to_file(const char* v, const char* file_name);
00165 
00167 template<uint8_t fixed_int_width, class size_type_class>
00168 bool store_to_file(const int_vector<fixed_int_width, size_type_class>& v, const char* file_name, bool write_fixed_as_variable=false);
00169 
00171 
00174 std::string demangle(const char* name);
00175 
00177 std::string demangle2(const char* name);
00178 
00179 template<class T>
00180 std::string class_name(const T& t)
00181 {
00182     std::string result = demangle2(typeid(t).name());
00183     size_t template_pos = result.find("<");
00184     if (template_pos != std::string::npos) {
00185         result = result.erase(template_pos);
00186     }
00187     return result;
00188 }
00189 
00191 
00194 template<class T>
00195 typename T::size_type get_size_in_bytes(const T& t);
00196 
00198 
00201 template<class T>
00202 double get_size_in_mega_bytes(const T& t);
00203 
00204 struct nullstream : std::ostream {
00205     struct nullbuf: std::streambuf {
00206         int overflow(int c) {
00207             return traits_type::not_eof(c);
00208         }
00209     } m_sbuf;
00210     nullstream(): std::ios(&m_sbuf), std::ostream(&m_sbuf), m_sbuf() {}
00211 };
00212 
00213 // Writes primitive-typed variable t to stream out
00214 template<class T>
00215 size_t write_member(const T& t, std::ostream& out, sdsl::structure_tree_node* v=NULL, std::string name="")
00216 {
00217     sdsl::structure_tree_node* child = sdsl::structure_tree::add_child(v, name, util::class_name(t));
00218     out.write((char*)&t, sizeof(t));
00219     size_t written_bytes = sizeof(t);
00220     sdsl::structure_tree::add_size(child, written_bytes);
00221     return written_bytes;
00222 }
00223 
00224 // Specialization for std::string
00225 template<>
00226 size_t write_member<std::string>(const std::string& t, std::ostream& out, structure_tree_node* v, std::string name);
00227 
00228 // Writes primitive-typed variable t to stream out
00229 template<class T>
00230 void read_member(T& t, std::istream& in)
00231 {
00232     in.read((char*)&t, sizeof(t));
00233 }
00234 
00235 // Specialization for std::string
00236 template<>
00237 void read_member<std::string>(std::string& t, std::istream& in);
00238 
00240 uint64_t get_pid();
00241 
00242 class _id_helper
00243 {
00244     private:
00245         static uint64_t id;
00246     public:
00247         static uint64_t getId() {
00248             return id++;
00249         }
00250 };
00251 
00252 
00254 inline uint64_t get_id()
00255 {
00256     return _id_helper::getId();
00257 }
00258 
00260 template<typename T>
00261 std::string to_string(const T& t);
00262 
00263 template<typename T>
00264 std::string to_latex_string(const T& t);
00265 
00266 std::string to_latex_string(unsigned char c);
00267 
00269 void delete_all_files(tMSS& file_map);
00270 
00271 
00272 // thanks to Stefan Arnold for the assign functions
00274 
00278 template<class T, class U>
00279 void assign(T& x, const U& y)
00280 {
00281     x = T(y);
00282 }
00283 
00285 
00289 template<class T>
00290 void assign(T& x, T& y)
00291 {
00292     x.swap(y);
00293 }
00294 
00296 
00299 template<class T>
00300 void clear(T& x)
00301 {
00302     T y;
00303     x.swap(y);
00304 }
00305 
00307 
00315 template<class S, class P>
00316 void swap_support(S& s1, S& s2, const P* p1, const P* p2)
00317 {
00318     s1.swap(s2);
00319     s1.set_vector(p1);
00320     s2.set_vector(p2);
00321 }
00322 
00324 
00327 template<class S, class X>
00328 void init_support(S& s, const X* x)
00329 {
00330     S temp(x);                  // generate a temporary support object
00331     s.swap(temp);               // swap its content with the target object
00332     s.set_vector(x);    // set the support object's  pointer to x
00333 }
00334 
00335 template<format_type F, class X>
00336 void write_structure(const X& x, std::ostream& out)
00337 {
00338     structure_tree_node* v = new structure_tree_node();
00339     nullstream ns;
00340     x.serialize(ns, v, "");
00341     if (v->children.size() > 0) {
00342         sdsl::write_structure_tree<F>(v->children[0], out);
00343     }
00344     delete v;
00345 }
00346 
00347 } // end namespace util
00348 
00349 //==================== Template functions ====================
00350 
00351 
00352 template<class T>
00353 typename T::size_type util::get_size_in_bytes(const T& t)
00354 {
00355     if ((&t) == NULL)
00356         return 0;
00357     util::nullstream ns;
00358     return t.serialize(ns);
00359 }
00360 
00361 template<class T>
00362 double util::get_size_in_mega_bytes(const T& t)
00363 {
00364     return get_size_in_bytes(t)/(1024.0*1024.0);
00365 }
00366 
00367 template<class T>
00368 bool util::store_to_file(const T& t, const char* file_name)
00369 {
00370     std::ofstream out;
00371     out.open(file_name, std::ios::binary | std::ios::trunc | std::ios::out);
00372     if (!out)
00373         return false;
00374     t.serialize(out);
00375     out.close();
00376     return true;
00377 }
00378 
00379 inline bool util::store_to_file(const char* v, const char* file_name)
00380 {
00381     std::ofstream out;
00382     out.open(file_name, std::ios::binary | std::ios::trunc | std::ios::out);
00383     if (!out)
00384         return false;
00385     uint64_t n = strlen((const char*)v);
00386     out.write(v, n);
00387     out.close();
00388     return true;
00389 }
00390 
00391 template<uint8_t fixed_int_width, class size_type_class>
00392 bool util::store_to_file(const int_vector<fixed_int_width, size_type_class>& v, const char* file_name, bool write_fixed_as_variable)
00393 {
00394     std::ofstream out;
00395     out.open(file_name, std::ios::binary | std::ios::trunc | std::ios::out);
00396     if (!out)
00397         return false;
00398     v.serialize(out, NULL, "", write_fixed_as_variable);
00399     out.close();
00400     return true;
00401 }
00402 
00403 template<class T>
00404 bool util::load_from_file(T& v, const char* file_name)
00405 {
00406     std::ifstream in;
00407     in.open(file_name, std::ios::binary | std::ios::in);
00408     if (!in)
00409         return false;
00410     v.load(in);
00411     in.close();
00412     return true;
00413 }
00414 
00415 
00416 template<class size_type_class>
00417 bool util::load_from_int_vector_buffer(unsigned char*& text, int_vector_file_buffer<8, size_type_class>& text_buf)
00418 {
00419     text_buf.reset();
00420     size_type_class n = text_buf.int_vector_size;
00421     if (text != NULL) {
00422         delete [] text;
00423         text = NULL;
00424     }
00425     text = new unsigned char[n];
00426     for (size_type_class i=0, r_sum=0, r=text_buf.load_next_block(); r_sum < n;) {
00427         for (; i < r_sum+r; ++i) {
00428             text[i] = text_buf[i-r_sum];
00429         }
00430         r_sum += r; r = text_buf.load_next_block();
00431     }
00432     return true;
00433 }
00434 
00435 
00436 
00437 
00438 template<class int_vector_type>
00439 void util::set_random_bits(int_vector_type& v, int seed)
00440 {
00441     if (0 == seed) {
00442         srand48((int)time(NULL));
00443     } else
00444         srand48(seed);
00445 
00446     uint64_t* data = v.m_data;
00447     if (v.empty())
00448         return;
00449     *data = (((uint64_t)lrand48()&0xFFFFULL)<<48)
00450             |(((uint64_t)lrand48()&0xFFFFULL)<<32)
00451             |(((uint64_t)lrand48()&0xFFFFULL)<<16)
00452             |((uint64_t)lrand48()&0xFFFFULL);
00453     for (typename int_vector_type::size_type i=1; i < (v.capacity()>>6); ++i) {
00454         *(++data) = (((uint64_t)lrand48()&0xFFFFULL)<<48)
00455                     |(((uint64_t)lrand48()&0xFFFFULL)<<32)
00456                     |(((uint64_t)lrand48()&0xFFFFULL)<<16)
00457                     |((uint64_t)lrand48()&0xFFFFULL);
00458     }
00459 }
00460 
00461 // all elements of vector v modulo m
00462 template<class int_vector_type, class size_type_class>
00463 void util::all_elements_mod(int_vector_type& v, size_type_class m)
00464 {
00465     for (typename int_vector_type::size_type i=0; i < v.size(); ++i) {
00466         v[i] = v[i] % m;
00467     }
00468 }
00469 
00470 template<class int_vector_type>
00471 void util::set_zero_bits(int_vector_type& v)
00472 {
00473     uint64_t* data = v.m_data;
00474     if (v.empty())
00475         return;
00476     // TODO: replace by memset() but take care of size_t in the argument!
00477     *data = 0ULL;
00478     for (typename int_vector_type::size_type i=1; i < (v.capacity()>>6); ++i) {
00479         *(++data) = 0ULL;
00480     }
00481 }
00482 
00483 template<class int_vector_type>
00484 void util::set_one_bits(int_vector_type& v)
00485 {
00486     uint64_t* data = v.m_data;
00487     if (v.empty())
00488         return;
00489     *data = 0xFFFFFFFFFFFFFFFFULL;
00490     for (typename int_vector_type::size_type i=1; i < (v.capacity()>>6); ++i) {
00491         *(++data) = 0xFFFFFFFFFFFFFFFFULL;
00492     }
00493 }
00494 
00495 template<class int_vector_type>
00496 void util::bit_compress(int_vector_type& v)
00497 {
00498     typename int_vector_type::value_type max=0;
00499     for (typename int_vector_type::size_type i=0; i < v.size(); ++i) {
00500         if (v[i] > max) {
00501             max = v[i];
00502         }
00503     }
00504     uint8_t min_width = bit_magic::l1BP(max)+1;
00505     uint8_t old_width = v.get_int_width();
00506     if (old_width > min_width) {
00507         const uint64_t* read_data = v.m_data;
00508         uint64_t* write_data = v.m_data;
00509         uint8_t read_offset = 0;
00510         uint8_t write_offset = 0;
00511         for (typename int_vector_type::size_type i=0; i < v.size(); ++i) {
00512             uint64_t x = bit_magic::read_int_and_move(read_data, read_offset, old_width);
00513             bit_magic::write_int_and_move(write_data,  x, write_offset, min_width);
00514         }
00515         v.bit_resize(v.size()*min_width);
00516         v.set_int_width(min_width);
00517     }
00518 }
00519 
00520 
00521 template<class int_vector_type>
00522 void util::set_all_values_to_k(int_vector_type& v, uint64_t k)
00523 {
00524     uint64_t* data = v.m_data;
00525     if (v.empty())
00526         return;
00527     uint8_t int_width = v.m_int_width;
00528     if (int_width == 0) {
00529         throw std::logic_error("util::set_all_values_to_k can not be performed with int_width=0!");
00530     }
00531     k = k & (0xFFFFFFFFFFFFFFFFULL >> (64-int_width));
00532     uint64_t vec[67] = {0}; // allocate memory for the mask and initialize with zeros
00533     vec[0] = 0;
00534     uint8_t offset = 0;
00535     uint64_t n=0, vals=0;
00536     do { // loop terminates after at most 64 iterations
00537         vec[n] = vec[n] | (k << offset);
00538         offset += int_width;
00539         vals++;
00540         if (offset >= 64) {
00541             vec[n+1] = 0;
00542             vec[++n] = k >> (int_width-(offset-64));
00543             offset -= 64;
00544         }
00545     } while (offset != 0);
00546 
00547     typename int_vector_type::size_type n64 = v.capacity()/64;
00548     for (typename int_vector_type::size_type i=0; i < n64;) {
00549         for (uint64_t ii=0; ii < n and i < n64; ++ii,++i) {
00550             *(data++) = vec[ii];
00551         }
00552     }
00553 }
00554 
00555 
00556 template<class int_vector_type>
00557 void util::set_to_id(int_vector_type& v)
00558 {
00559     for (typename int_vector_type::size_type i=0; i < v.size(); ++i) {
00560         v[i] = i;
00561     }
00562 }
00563 
00564 template<class int_vector_type>
00565 typename int_vector_type::size_type util::get_one_bits(const int_vector_type& v)
00566 {
00567     const uint64_t* data = v.data();
00568     if (v.empty())
00569         return 0;
00570     typename int_vector_type::size_type result = bit_magic::b1Cnt(*data);
00571     for (typename int_vector_type::size_type i=1; i < (v.capacity()>>6); ++i) {
00572         result += bit_magic::b1Cnt(*(++data));
00573     }
00574     if (v.bit_size()&0x3F) {
00575         result -= bit_magic::b1Cnt((*data) & (~bit_magic::Li1Mask[v.bit_size()&0x3F]));
00576     }
00577     return result;
00578 }
00579 
00580 
00581 template<class int_vector_type>
00582 typename int_vector_type::size_type util::get_onezero_bits(const int_vector_type& v)
00583 {
00584     const uint64_t* data = v.data();
00585     if (v.empty())
00586         return 0;
00587     uint64_t carry = 0, oldcarry=0;
00588     typename int_vector_type::size_type result = bit_magic::b10Cnt(*data, carry);
00589     for (typename int_vector_type::size_type i=1; i < (v.capacity()>>6); ++i) {
00590         oldcarry = carry;
00591         result += bit_magic::b10Cnt(*(++data), carry);
00592     }
00593     if (v.bit_size()&0x3F) {// if bit_size is not a multiple of 64, substract the counts of the additional bits
00594         result -= bit_magic::b1Cnt(bit_magic::b10Map(*data, oldcarry) & bit_magic::Li0Mask[v.bit_size()&0x3F]);
00595     }
00596     return result;
00597 }
00598 
00599 template<class int_vector_type>
00600 typename int_vector_type::size_type util::get_zeroone_bits(const int_vector_type& v)
00601 {
00602     const uint64_t* data = v.data();
00603     if (v.empty())
00604         return 0;
00605     uint64_t carry = 1, oldcarry = 1;
00606     typename int_vector_type::size_type result = bit_magic::b01Cnt(*data, carry);
00607     for (typename int_vector_type::size_type i=1; i < (v.capacity()>>6); ++i) {
00608         oldcarry = carry;
00609         result += bit_magic::b01Cnt(*(++data), carry);
00610     }
00611     if (v.bit_size()&0x3F) {// if bit_size is not a multiple of 64, substract the counts of the additional bits
00612         result -= bit_magic::b1Cnt(bit_magic::b01Map(*data, oldcarry) & bit_magic::Li0Mask[v.bit_size()&0x3F]);
00613     }
00614     return result;
00615 }
00616 
00617 template<typename T>
00618 std::string util::to_string(const T& t)
00619 {
00620     std::stringstream ss;
00621     ss<<t;
00622     return ss.str();
00623 }
00624 
00625 template<typename T>
00626 std::string util::to_latex_string(const T& t)
00627 {
00628     return to_string(t);
00629 }
00630 
00631 }// end namespace sdsl
00632 
00633 #endif // end file