SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2010 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef INCLUDED_SDSL_CST_CONSTRUCT 00022 #define INCLUDED_SDSL_CST_CONSTRUCT 00023 00024 #include "int_vector.hpp" 00025 #include "typedefs.hpp" 00026 #include "algorithms_for_suffix_array_construction.hpp" 00027 #include "algorithms_for_compressed_suffix_arrays.hpp" 00028 #include "util.hpp" 00029 #include "testutils.hpp" 00030 #include "lcp_construct.hpp" 00031 00032 #include <iostream> 00033 #include <stdexcept> 00034 00035 namespace sdsl 00036 { 00037 00039 00052 template<class Cst> 00053 bool construct_cst(std::string file_name, Cst& cst) 00054 { 00055 tMSS file_map; 00056 return construct_cst(file_name, cst, file_map, true, "./", false, "", "any"); 00057 } 00058 00060 00075 template<class Cst> 00076 bool construct_cst(std::string file_name, Cst& cst, tMSS& file_map, bool delete_files=true, std::string dir="./", bool build_only_bps=false, std::string id="", std::string lcp_method="any") 00077 { 00078 uint64_t fs = 0; 00079 char* ccc = NULL; 00080 if ((fs=file::read_text((const char*)file_name.c_str(), ccc))>0) { 00081 typename Cst::size_type n = strlen((const char*)ccc); 00082 if (id=="") 00083 id = util::to_string(util::get_pid())+"_"+util::to_string(util::get_id()).c_str(); 00084 if (fs != n+1) { 00085 std::cerr << "# WARNING: file \"" << file_name << "\" contains 0-bytes. (cst_construct) fs="<<fs<<" n="<< n << std::endl; 00086 algorithm::shift_text((char*)ccc, fs, true); 00087 n = fs-1; 00088 } 00089 00090 if (!util::store_to_file(char_array_serialize_wrapper<>((unsigned char*)ccc,n+1), (dir+"text_"+id).c_str())) { 00091 throw std::ios_base::failure("#csa_construct: Cannot store text to file system!"); 00092 delete [] ccc; 00093 return false; 00094 } else { 00095 file_map["text"] = (dir+"text_"+id).c_str(); 00096 } 00097 delete [] ccc; 00098 return construct_cst(cst, file_map, delete_files, dir, build_only_bps, id, lcp_method); 00099 } 00100 return false; 00101 } 00102 00103 template<class Cst> 00104 static bool construct_cst(Cst& cst, tMSS& file_map, bool delete_files=true, std::string dir="./", bool build_only_bps=false, std::string id="", std::string lcp_method="any") 00105 { 00106 typedef typename Cst::csa_type csa_type; 00107 typedef int_vector<>::size_type size_type; 00108 if (id == "") 00109 id = util::to_string(util::get_pid())+"_"+util::to_string(util::get_id()).c_str(); 00110 write_R_output("cst","construct CST", "begin", 1, 0); 00111 { 00112 csa_type csa; 00113 construct_csa(csa, file_map, false, dir, id); 00114 if (!util::store_to_file(csa, (dir+"csa_"+id).c_str())) { 00115 throw std::ios_base::failure("cst_construct: Cannot store CSA to file system!"); 00116 return false; 00117 } else { 00118 file_map["csa"] = dir+"csa_"+id; 00119 } 00120 } 00121 00122 { 00123 // test if lcp is already calculated 00124 std::ifstream in((dir+"lcp_"+id).c_str()); 00125 if (in) { 00126 file_map["lcp"] = dir+"lcp_"+id; 00127 } 00128 } 00129 00130 if (file_map.find("lcp") == file_map.end()) { 00131 if (lcp_method=="any") { 00132 if (file_map.find("isa") == file_map.end()) { // if isa is not already on disk => use PHI algorithm for lcp 00133 if (!construct_lcp_PHI(file_map, dir, id)) 00134 return false; 00135 } else { 00136 if (!construct_lcp_kasai(file_map, dir, id)) 00137 return false; 00138 } 00139 // construct_lcp_go(file_map, dir, id); 00140 } else { 00141 if (lcp_method=="go") 00142 construct_lcp_go(file_map, dir, id); 00143 else if (lcp_method=="go2") { 00144 construct_lcp_go2(file_map, dir, id); 00145 } else if (lcp_method=="goPHI") { 00146 construct_lcp_goPHI(file_map, dir, id); 00147 } else if (lcp_method=="PHI") 00148 construct_lcp_PHI(file_map, dir, id); 00149 else if (lcp_method=="PHIse") 00150 construct_lcp_PHI(file_map, dir, id, true); 00151 else if (lcp_method=="simple_n5") 00152 construct_lcp_simple_5n(file_map, dir, id); 00153 else if (lcp_method=="simple2_n9") 00154 construct_lcp_simple2_9n(file_map, dir, id); 00155 else if (lcp_method=="sparse_phi") 00156 construct_lcp_semi_extern_PHI(file_map, dir, id); 00157 else 00158 construct_lcp_kasai(file_map, dir, id); 00159 } 00160 } 00161 /* { 00162 int_vector<> lcp_old; 00163 util::load_from_file(lcp_old, file_map["lcp"].c_str()); 00164 int_vector<8> lcp_sml; 00165 util::load_from_file(lcp_sml, file_map["lcp_go"].c_str()); 00166 for(size_type i=0; i<lcp_old.size(); ++i){ 00167 if( lcp_old[i] < 255 ){ 00168 if( lcp_old[i] != lcp_sml[i] ){ 00169 std::cout<<"ERROR i="<<i<<" "<<lcp_old[i]<<" "<<(size_type)lcp_sml[i]<<std::endl; 00170 } 00171 } 00172 } 00173 } 00174 */ 00175 00176 00177 { 00178 cst.construct(file_map, dir, id, build_only_bps); 00179 } 00180 if (delete_files) { 00181 util::delete_all_files(file_map); 00182 } 00183 write_R_output("cst","construct CST", "end", 1, 0); 00184 return true; 00185 } 00186 00187 }// end namespace 00188 00189 #endif