SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/cst_construct.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2010 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef INCLUDED_SDSL_CST_CONSTRUCT
00022 #define INCLUDED_SDSL_CST_CONSTRUCT
00023 
00024 #include "int_vector.hpp"
00025 #include "typedefs.hpp"
00026 #include "algorithms_for_suffix_array_construction.hpp"
00027 #include "algorithms_for_compressed_suffix_arrays.hpp"
00028 #include "util.hpp"
00029 #include "testutils.hpp"
00030 #include "lcp_construct.hpp"
00031 
00032 #include <iostream>
00033 #include <stdexcept>
00034 
00035 namespace sdsl
00036 {
00037 
00039 
00052 template<class Cst>
00053 bool construct_cst(std::string file_name, Cst& cst)
00054 {
00055     tMSS file_map;
00056     return construct_cst(file_name, cst, file_map, true, "./", false, "", "any");
00057 }
00058 
00060 
00075 template<class Cst>
00076 bool construct_cst(std::string file_name, Cst& cst, tMSS& file_map, bool delete_files=true, std::string dir="./", bool build_only_bps=false, std::string id="", std::string lcp_method="any")
00077 {
00078     uint64_t fs = 0;
00079     char* ccc = NULL;
00080     if ((fs=file::read_text((const char*)file_name.c_str(), ccc))>0) {
00081         typename Cst::size_type n = strlen((const char*)ccc);
00082         if (id=="")
00083             id =  util::to_string(util::get_pid())+"_"+util::to_string(util::get_id()).c_str();
00084         if (fs != n+1) {
00085             std::cerr << "# WARNING: file \"" << file_name << "\" contains 0-bytes. (cst_construct) fs="<<fs<<" n="<< n << std::endl;
00086             algorithm::shift_text((char*)ccc, fs, true);
00087             n = fs-1;
00088         }
00089 
00090         if (!util::store_to_file(char_array_serialize_wrapper<>((unsigned char*)ccc,n+1), (dir+"text_"+id).c_str())) {
00091             throw std::ios_base::failure("#csa_construct: Cannot store text to file system!");
00092             delete [] ccc;
00093             return false;
00094         } else {
00095             file_map["text"] = (dir+"text_"+id).c_str();
00096         }
00097         delete [] ccc;
00098         return construct_cst(cst, file_map, delete_files, dir, build_only_bps, id, lcp_method);
00099     }
00100     return false;
00101 }
00102 
00103 template<class Cst>
00104 static bool construct_cst(Cst& cst, tMSS& file_map, bool delete_files=true, std::string dir="./", bool build_only_bps=false, std::string id="", std::string lcp_method="any")
00105 {
00106     typedef typename Cst::csa_type csa_type;
00107     typedef int_vector<>::size_type size_type;
00108     if (id == "")
00109         id =  util::to_string(util::get_pid())+"_"+util::to_string(util::get_id()).c_str();
00110     write_R_output("cst","construct CST", "begin", 1, 0);
00111     {
00112         csa_type csa;
00113         construct_csa(csa, file_map, false, dir, id);
00114         if (!util::store_to_file(csa, (dir+"csa_"+id).c_str())) {
00115             throw std::ios_base::failure("cst_construct: Cannot store CSA to file system!");
00116             return false;
00117         } else {
00118             file_map["csa"] = dir+"csa_"+id;
00119         }
00120     }
00121 
00122     {
00123         // test if lcp is already calculated
00124         std::ifstream in((dir+"lcp_"+id).c_str());
00125         if (in) {
00126             file_map["lcp"] = dir+"lcp_"+id;
00127         }
00128     }
00129 
00130     if (file_map.find("lcp") == file_map.end()) {
00131         if (lcp_method=="any") {
00132             if (file_map.find("isa") == file_map.end()) { // if isa is not already on disk => use PHI algorithm for lcp
00133                 if (!construct_lcp_PHI(file_map, dir, id))
00134                     return false;
00135             } else {
00136                 if (!construct_lcp_kasai(file_map, dir, id))
00137                     return false;
00138             }
00139             //                  construct_lcp_go(file_map, dir, id);
00140         } else {
00141             if (lcp_method=="go")
00142                 construct_lcp_go(file_map, dir, id);
00143             else if (lcp_method=="go2") {
00144                 construct_lcp_go2(file_map, dir, id);
00145             } else if (lcp_method=="goPHI") {
00146                 construct_lcp_goPHI(file_map, dir, id);
00147             } else if (lcp_method=="PHI")
00148                 construct_lcp_PHI(file_map, dir, id);
00149             else if (lcp_method=="PHIse")
00150                 construct_lcp_PHI(file_map, dir, id, true);
00151             else if (lcp_method=="simple_n5")
00152                 construct_lcp_simple_5n(file_map, dir, id);
00153             else if (lcp_method=="simple2_n9")
00154                 construct_lcp_simple2_9n(file_map, dir, id);
00155             else if (lcp_method=="sparse_phi")
00156                 construct_lcp_semi_extern_PHI(file_map, dir, id);
00157             else
00158                 construct_lcp_kasai(file_map, dir, id);
00159         }
00160     }
00161     /*          {
00162                         int_vector<> lcp_old;
00163                         util::load_from_file(lcp_old, file_map["lcp"].c_str());
00164                         int_vector<8> lcp_sml;
00165                         util::load_from_file(lcp_sml, file_map["lcp_go"].c_str());
00166                         for(size_type i=0; i<lcp_old.size(); ++i){
00167                                 if( lcp_old[i] < 255 ){
00168                                         if( lcp_old[i] != lcp_sml[i] ){
00169                                                 std::cout<<"ERROR i="<<i<<" "<<lcp_old[i]<<" "<<(size_type)lcp_sml[i]<<std::endl;
00170                                         }
00171                                 }
00172                         }
00173                 }
00174     */
00175 
00176 
00177     {
00178         cst.construct(file_map, dir, id, build_only_bps);
00179     }
00180     if (delete_files) {
00181         util::delete_all_files(file_map);
00182     }
00183     write_R_output("cst","construct CST", "end", 1, 0);
00184     return true;
00185 }
00186 
00187 }// end namespace
00188 
00189 #endif