SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2010 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef INCLUDED_SDSL_BWT_CONSTRUCT 00022 #define INCLUDED_SDSL_BWT_CONSTRUCT 00023 00024 #include "typedefs.hpp" 00025 #include "int_vector.hpp" 00026 #include "util.hpp" 00027 #include "testutils.hpp" 00028 00029 #include <iostream> 00030 #include <fstream> 00031 #include <stdexcept> 00032 #include <list> 00033 00034 namespace sdsl 00035 { 00036 00044 bool construct_bwt(tMSS& file_map, const std::string& dir, const std::string& id); 00045 00046 /* Constructs the Burrows and Wheeler Transform (BWT) from text and suffix array 00047 * \param file_map A map, which contains the paths of the precalculated files like suffix array or text 00048 * \param dir Directory in which the result should be written on disk. 00049 * \param id Id which should be used to build a file name for the calculated BWT. 00050 * \par Space complexity: 00051 * \f$2n\f$ bytes 00052 */ 00053 bool construct_bwt2(tMSS& file_map, const std::string& dir, const std::string& id); 00054 00055 00056 /* 00057 bool construct_bwt( tMSS &file_map, const std::string &dir, const std::string &id){ 00058 typedef int_vector<>::size_type size_type; 00059 write_R_output("csa", "construct BWT", "begin", 1, 0); 00060 if( file_map.find("bwt") == file_map.end() ){ // if bwt is not already on disk => calculate it 00061 int_vector_file_buffer<8> text_buf( file_map["text"].c_str() ); 00062 size_type n = text_buf.int_vector_size; 00063 int_vector_file_buffer<> sa_buf(file_map["sa"].c_str()); 00064 unsigned char *bwt = new unsigned char[n+1]; 00065 unsigned char *text = NULL; 00066 util::load_from_int_vector_buffer(text, text_buf); 00067 for(size_type i=0, r_sum=0, r = sa_buf.load_next_block(); r_sum < n; ){ 00068 for(; i<r_sum+r; ++i){ 00069 bwt[i] = text[ (sa_buf[i-r_sum]+n-1)%n ]; 00070 } 00071 r_sum += r; r = sa_buf.load_next_block(); 00072 } 00073 delete [] text; text = NULL; 00074 write_R_output("csa", "store BWT", "begin", 1, 0); 00075 00076 if( !util::store_to_file(char_array_serialize_wrapper<>(bwt,n), (dir+"bwt_"+id).c_str() ) ){ 00077 throw std::ios_base::failure( "#csa_construct: Cannot store bwt to file system!" ); 00078 return false; 00079 }else{ 00080 file_map["bwt"] = dir+"bwt_"+id; 00081 } 00082 delete [] bwt; bwt = NULL; 00083 write_R_output("csa", "store BWT", "end", 1, 0); 00084 } 00085 write_R_output("csa", "construct BWT", "end", 1, 0); 00086 return true; 00087 } 00088 */ 00089 00090 }// end namespace 00091 00092 #endif