SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2009 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef INCLUDED_SDSL_UTIL 00022 #define INCLUDED_SDSL_UTIL 00023 00024 #include "bitmagic.hpp" 00025 #include "typedefs.hpp" 00026 #include "structure_tree.hpp" 00027 #include <iosfwd> // forward declaration of ostream 00028 #include <stdint.h> // for uint64_t uint32_t declaration 00029 #include <cassert> 00030 #include <fstream> // file stream for storeToFile and loadFromFile 00031 #include <ctime> // for rand initialization 00032 #include <string> 00033 #include <string.h> // for strlen and strdup 00034 #include <libgen.h> // for basename 00035 #include <cstdlib> 00036 #include <unistd.h> // for getpid 00037 #include <sstream> // for to_string method 00038 #include <stdexcept> // for std::logic_error 00039 #include <typeinfo> // for typeid 00040 00041 00042 // macros to transform a defined name to a string 00043 #define SDSL_STR(x) #x 00044 #define SDSL_XSTR(s) SDSL_STR(s) 00045 00047 namespace sdsl 00048 { 00049 00050 //class structure_tree_node; // forward declaration of data structure in structure_tree.hpp 00051 //class structure_tree; // forward declaration of data structure in structure.hpp 00052 00053 template<uint8_t, class size_type_class> 00054 class int_vector_file_buffer; // forward declaration 00055 00056 template<uint8_t, class size_type_class> 00057 class int_vector; // forward declaration 00058 00060 namespace util 00061 { 00062 00063 static bool verbose = false; 00064 00065 void set_verbose(); 00066 00068 std::string basename(const std::string& file_name); 00069 00071 std::string dirname(const std::string& file_name); 00072 00074 00079 template<class int_vector_type> 00080 void set_random_bits(int_vector_type& v, int seed=0); 00082 template<class int_vector_type> 00083 void set_zero_bits(int_vector_type& v); 00085 template<class int_vector_type> 00086 void set_one_bits(int_vector_type& v); 00087 00089 00093 template<class int_vector_type> 00094 void bit_compress(int_vector_type& v); 00095 00097 template<class int_vector_type, class size_type_class> 00098 void all_elements_mod(int_vector_type& v, size_type_class m); 00099 00100 00102 00108 template<class int_vector_type> 00109 void set_all_values_to_k(int_vector_type& v, uint64_t k); 00110 00112 template<class int_vector_type> 00113 void set_to_id(int_vector_type& v); 00114 00116 00119 template<class int_vector_type> 00120 typename int_vector_type::size_type get_one_bits(const int_vector_type& v); 00121 00123 00125 template<class int_vector_type> 00126 typename int_vector_type::size_type get_onezero_bits(const int_vector_type& v); 00127 00129 00131 template <class int_vector_type> 00132 typename int_vector_type::size_type get_zeroone_bits(const int_vector_type& v); 00133 00135 00139 template<class T> 00140 bool load_from_file(T& v, const char* file_name); 00141 00142 template<> 00143 bool load_from_file(void*&, const char* file_name); 00144 00145 template<class size_type_class> 00146 bool load_from_int_vector_buffer(unsigned char*& text, int_vector_file_buffer<8, size_type_class>& text_buf); 00147 00149 /* \pre v=NULL 00150 * 00151 */ 00152 bool load_from_file(char*& v, const char* file_name); 00153 00155 00160 template<class T> 00161 bool store_to_file(const T& v, const char* file_name); 00162 00164 bool store_to_file(const char* v, const char* file_name); 00165 00167 template<uint8_t fixed_int_width, class size_type_class> 00168 bool store_to_file(const int_vector<fixed_int_width, size_type_class>& v, const char* file_name, bool write_fixed_as_variable=false); 00169 00171 00174 std::string demangle(const char* name); 00175 00177 std::string demangle2(const char* name); 00178 00179 template<class T> 00180 std::string class_name(const T& t) 00181 { 00182 std::string result = demangle2(typeid(t).name()); 00183 size_t template_pos = result.find("<"); 00184 if (template_pos != std::string::npos) { 00185 result = result.erase(template_pos); 00186 } 00187 return result; 00188 } 00189 00191 00194 template<class T> 00195 typename T::size_type get_size_in_bytes(const T& t); 00196 00198 00201 template<class T> 00202 double get_size_in_mega_bytes(const T& t); 00203 00204 struct nullstream : std::ostream { 00205 struct nullbuf: std::streambuf { 00206 int overflow(int c) { 00207 return traits_type::not_eof(c); 00208 } 00209 } m_sbuf; 00210 nullstream(): std::ios(&m_sbuf), std::ostream(&m_sbuf), m_sbuf() {} 00211 }; 00212 00213 // Writes primitive-typed variable t to stream out 00214 template<class T> 00215 size_t write_member(const T& t, std::ostream& out, sdsl::structure_tree_node* v=NULL, std::string name="") 00216 { 00217 sdsl::structure_tree_node* child = sdsl::structure_tree::add_child(v, name, util::class_name(t)); 00218 out.write((char*)&t, sizeof(t)); 00219 size_t written_bytes = sizeof(t); 00220 sdsl::structure_tree::add_size(child, written_bytes); 00221 return written_bytes; 00222 } 00223 00224 // Specialization for std::string 00225 template<> 00226 size_t write_member<std::string>(const std::string& t, std::ostream& out, structure_tree_node* v, std::string name); 00227 00228 // Writes primitive-typed variable t to stream out 00229 template<class T> 00230 void read_member(T& t, std::istream& in) 00231 { 00232 in.read((char*)&t, sizeof(t)); 00233 } 00234 00235 // Specialization for std::string 00236 template<> 00237 void read_member<std::string>(std::string& t, std::istream& in); 00238 00240 uint64_t get_pid(); 00241 00242 class _id_helper 00243 { 00244 private: 00245 static uint64_t id; 00246 public: 00247 static uint64_t getId() { 00248 return id++; 00249 } 00250 }; 00251 00252 00254 inline uint64_t get_id() 00255 { 00256 return _id_helper::getId(); 00257 } 00258 00260 template<typename T> 00261 std::string to_string(const T& t); 00262 00263 template<typename T> 00264 std::string to_latex_string(const T& t); 00265 00266 std::string to_latex_string(unsigned char c); 00267 00269 void delete_all_files(tMSS& file_map); 00270 00271 00272 // thanks to Stefan Arnold for the assign functions 00274 00278 template<class T, class U> 00279 void assign(T& x, const U& y) 00280 { 00281 x = T(y); 00282 } 00283 00285 00289 template<class T> 00290 void assign(T& x, T& y) 00291 { 00292 x.swap(y); 00293 } 00294 00296 00299 template<class T> 00300 void clear(T& x) 00301 { 00302 T y; 00303 x.swap(y); 00304 } 00305 00307 00315 template<class S, class P> 00316 void swap_support(S& s1, S& s2, const P* p1, const P* p2) 00317 { 00318 s1.swap(s2); 00319 s1.set_vector(p1); 00320 s2.set_vector(p2); 00321 } 00322 00324 00327 template<class S, class X> 00328 void init_support(S& s, const X* x) 00329 { 00330 S temp(x); // generate a temporary support object 00331 s.swap(temp); // swap its content with the target object 00332 s.set_vector(x); // set the support object's pointer to x 00333 } 00334 00335 template<format_type F, class X> 00336 void write_structure(const X& x, std::ostream& out) 00337 { 00338 structure_tree_node* v = new structure_tree_node(); 00339 nullstream ns; 00340 x.serialize(ns, v, ""); 00341 if (v->children.size() > 0) { 00342 sdsl::write_structure_tree<F>(v->children[0], out); 00343 } 00344 delete v; 00345 } 00346 00347 } // end namespace util 00348 00349 //==================== Template functions ==================== 00350 00351 00352 template<class T> 00353 typename T::size_type util::get_size_in_bytes(const T& t) 00354 { 00355 if ((&t) == NULL) 00356 return 0; 00357 util::nullstream ns; 00358 return t.serialize(ns); 00359 } 00360 00361 template<class T> 00362 double util::get_size_in_mega_bytes(const T& t) 00363 { 00364 return get_size_in_bytes(t)/(1024.0*1024.0); 00365 } 00366 00367 template<class T> 00368 bool util::store_to_file(const T& t, const char* file_name) 00369 { 00370 std::ofstream out; 00371 out.open(file_name, std::ios::binary | std::ios::trunc | std::ios::out); 00372 if (!out) 00373 return false; 00374 t.serialize(out); 00375 out.close(); 00376 return true; 00377 } 00378 00379 inline bool util::store_to_file(const char* v, const char* file_name) 00380 { 00381 std::ofstream out; 00382 out.open(file_name, std::ios::binary | std::ios::trunc | std::ios::out); 00383 if (!out) 00384 return false; 00385 uint64_t n = strlen((const char*)v); 00386 out.write(v, n); 00387 out.close(); 00388 return true; 00389 } 00390 00391 template<uint8_t fixed_int_width, class size_type_class> 00392 bool util::store_to_file(const int_vector<fixed_int_width, size_type_class>& v, const char* file_name, bool write_fixed_as_variable) 00393 { 00394 std::ofstream out; 00395 out.open(file_name, std::ios::binary | std::ios::trunc | std::ios::out); 00396 if (!out) 00397 return false; 00398 v.serialize(out, NULL, "", write_fixed_as_variable); 00399 out.close(); 00400 return true; 00401 } 00402 00403 template<class T> 00404 bool util::load_from_file(T& v, const char* file_name) 00405 { 00406 std::ifstream in; 00407 in.open(file_name, std::ios::binary | std::ios::in); 00408 if (!in) 00409 return false; 00410 v.load(in); 00411 in.close(); 00412 return true; 00413 } 00414 00415 00416 template<class size_type_class> 00417 bool util::load_from_int_vector_buffer(unsigned char*& text, int_vector_file_buffer<8, size_type_class>& text_buf) 00418 { 00419 text_buf.reset(); 00420 size_type_class n = text_buf.int_vector_size; 00421 if (text != NULL) { 00422 delete [] text; 00423 text = NULL; 00424 } 00425 text = new unsigned char[n]; 00426 for (size_type_class i=0, r_sum=0, r=text_buf.load_next_block(); r_sum < n;) { 00427 for (; i < r_sum+r; ++i) { 00428 text[i] = text_buf[i-r_sum]; 00429 } 00430 r_sum += r; r = text_buf.load_next_block(); 00431 } 00432 return true; 00433 } 00434 00435 00436 00437 00438 template<class int_vector_type> 00439 void util::set_random_bits(int_vector_type& v, int seed) 00440 { 00441 if (0 == seed) { 00442 srand48((int)time(NULL)); 00443 } else 00444 srand48(seed); 00445 00446 uint64_t* data = v.m_data; 00447 if (v.empty()) 00448 return; 00449 *data = (((uint64_t)lrand48()&0xFFFFULL)<<48) 00450 |(((uint64_t)lrand48()&0xFFFFULL)<<32) 00451 |(((uint64_t)lrand48()&0xFFFFULL)<<16) 00452 |((uint64_t)lrand48()&0xFFFFULL); 00453 for (typename int_vector_type::size_type i=1; i < (v.capacity()>>6); ++i) { 00454 *(++data) = (((uint64_t)lrand48()&0xFFFFULL)<<48) 00455 |(((uint64_t)lrand48()&0xFFFFULL)<<32) 00456 |(((uint64_t)lrand48()&0xFFFFULL)<<16) 00457 |((uint64_t)lrand48()&0xFFFFULL); 00458 } 00459 } 00460 00461 // all elements of vector v modulo m 00462 template<class int_vector_type, class size_type_class> 00463 void util::all_elements_mod(int_vector_type& v, size_type_class m) 00464 { 00465 for (typename int_vector_type::size_type i=0; i < v.size(); ++i) { 00466 v[i] = v[i] % m; 00467 } 00468 } 00469 00470 template<class int_vector_type> 00471 void util::set_zero_bits(int_vector_type& v) 00472 { 00473 uint64_t* data = v.m_data; 00474 if (v.empty()) 00475 return; 00476 // TODO: replace by memset() but take care of size_t in the argument! 00477 *data = 0ULL; 00478 for (typename int_vector_type::size_type i=1; i < (v.capacity()>>6); ++i) { 00479 *(++data) = 0ULL; 00480 } 00481 } 00482 00483 template<class int_vector_type> 00484 void util::set_one_bits(int_vector_type& v) 00485 { 00486 uint64_t* data = v.m_data; 00487 if (v.empty()) 00488 return; 00489 *data = 0xFFFFFFFFFFFFFFFFULL; 00490 for (typename int_vector_type::size_type i=1; i < (v.capacity()>>6); ++i) { 00491 *(++data) = 0xFFFFFFFFFFFFFFFFULL; 00492 } 00493 } 00494 00495 template<class int_vector_type> 00496 void util::bit_compress(int_vector_type& v) 00497 { 00498 typename int_vector_type::value_type max=0; 00499 for (typename int_vector_type::size_type i=0; i < v.size(); ++i) { 00500 if (v[i] > max) { 00501 max = v[i]; 00502 } 00503 } 00504 uint8_t min_width = bit_magic::l1BP(max)+1; 00505 uint8_t old_width = v.get_int_width(); 00506 if (old_width > min_width) { 00507 const uint64_t* read_data = v.m_data; 00508 uint64_t* write_data = v.m_data; 00509 uint8_t read_offset = 0; 00510 uint8_t write_offset = 0; 00511 for (typename int_vector_type::size_type i=0; i < v.size(); ++i) { 00512 uint64_t x = bit_magic::read_int_and_move(read_data, read_offset, old_width); 00513 bit_magic::write_int_and_move(write_data, x, write_offset, min_width); 00514 } 00515 v.bit_resize(v.size()*min_width); 00516 v.set_int_width(min_width); 00517 } 00518 } 00519 00520 00521 template<class int_vector_type> 00522 void util::set_all_values_to_k(int_vector_type& v, uint64_t k) 00523 { 00524 uint64_t* data = v.m_data; 00525 if (v.empty()) 00526 return; 00527 uint8_t int_width = v.m_int_width; 00528 if (int_width == 0) { 00529 throw std::logic_error("util::set_all_values_to_k can not be performed with int_width=0!"); 00530 } 00531 k = k & (0xFFFFFFFFFFFFFFFFULL >> (64-int_width)); 00532 uint64_t vec[67] = {0}; // allocate memory for the mask and initialize with zeros 00533 vec[0] = 0; 00534 uint8_t offset = 0; 00535 uint64_t n=0, vals=0; 00536 do { // loop terminates after at most 64 iterations 00537 vec[n] = vec[n] | (k << offset); 00538 offset += int_width; 00539 vals++; 00540 if (offset >= 64) { 00541 vec[n+1] = 0; 00542 vec[++n] = k >> (int_width-(offset-64)); 00543 offset -= 64; 00544 } 00545 } while (offset != 0); 00546 00547 typename int_vector_type::size_type n64 = v.capacity()/64; 00548 for (typename int_vector_type::size_type i=0; i < n64;) { 00549 for (uint64_t ii=0; ii < n and i < n64; ++ii,++i) { 00550 *(data++) = vec[ii]; 00551 } 00552 } 00553 } 00554 00555 00556 template<class int_vector_type> 00557 void util::set_to_id(int_vector_type& v) 00558 { 00559 for (typename int_vector_type::size_type i=0; i < v.size(); ++i) { 00560 v[i] = i; 00561 } 00562 } 00563 00564 template<class int_vector_type> 00565 typename int_vector_type::size_type util::get_one_bits(const int_vector_type& v) 00566 { 00567 const uint64_t* data = v.data(); 00568 if (v.empty()) 00569 return 0; 00570 typename int_vector_type::size_type result = bit_magic::b1Cnt(*data); 00571 for (typename int_vector_type::size_type i=1; i < (v.capacity()>>6); ++i) { 00572 result += bit_magic::b1Cnt(*(++data)); 00573 } 00574 if (v.bit_size()&0x3F) { 00575 result -= bit_magic::b1Cnt((*data) & (~bit_magic::Li1Mask[v.bit_size()&0x3F])); 00576 } 00577 return result; 00578 } 00579 00580 00581 template<class int_vector_type> 00582 typename int_vector_type::size_type util::get_onezero_bits(const int_vector_type& v) 00583 { 00584 const uint64_t* data = v.data(); 00585 if (v.empty()) 00586 return 0; 00587 uint64_t carry = 0, oldcarry=0; 00588 typename int_vector_type::size_type result = bit_magic::b10Cnt(*data, carry); 00589 for (typename int_vector_type::size_type i=1; i < (v.capacity()>>6); ++i) { 00590 oldcarry = carry; 00591 result += bit_magic::b10Cnt(*(++data), carry); 00592 } 00593 if (v.bit_size()&0x3F) {// if bit_size is not a multiple of 64, substract the counts of the additional bits 00594 result -= bit_magic::b1Cnt(bit_magic::b10Map(*data, oldcarry) & bit_magic::Li0Mask[v.bit_size()&0x3F]); 00595 } 00596 return result; 00597 } 00598 00599 template<class int_vector_type> 00600 typename int_vector_type::size_type util::get_zeroone_bits(const int_vector_type& v) 00601 { 00602 const uint64_t* data = v.data(); 00603 if (v.empty()) 00604 return 0; 00605 uint64_t carry = 1, oldcarry = 1; 00606 typename int_vector_type::size_type result = bit_magic::b01Cnt(*data, carry); 00607 for (typename int_vector_type::size_type i=1; i < (v.capacity()>>6); ++i) { 00608 oldcarry = carry; 00609 result += bit_magic::b01Cnt(*(++data), carry); 00610 } 00611 if (v.bit_size()&0x3F) {// if bit_size is not a multiple of 64, substract the counts of the additional bits 00612 result -= bit_magic::b1Cnt(bit_magic::b01Map(*data, oldcarry) & bit_magic::Li0Mask[v.bit_size()&0x3F]); 00613 } 00614 return result; 00615 } 00616 00617 template<typename T> 00618 std::string util::to_string(const T& t) 00619 { 00620 std::stringstream ss; 00621 ss<<t; 00622 return ss.str(); 00623 } 00624 00625 template<typename T> 00626 std::string util::to_latex_string(const T& t) 00627 { 00628 return to_string(t); 00629 } 00630 00631 }// end namespace sdsl 00632 00633 #endif // end file