SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
 All Classes Namespaces Files Functions Variables Typedefs Friends
sdsl/include/sdsl/bwt_construct.hpp
Go to the documentation of this file.
00001 /* sdsl - succinct data structures library
00002     Copyright (C) 2010 Simon Gog
00003 
00004     This program is free software: you can redistribute it and/or modify
00005     it under the terms of the GNU General Public License as published by
00006     the Free Software Foundation, either version 3 of the License, or
00007     (at your option) any later version.
00008 
00009     This program is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012     GNU General Public License for more details.
00013 
00014     You should have received a copy of the GNU General Public License
00015     along with this program.  If not, see http://www.gnu.org/licenses/ .
00016 */
00021 #ifndef INCLUDED_SDSL_BWT_CONSTRUCT
00022 #define INCLUDED_SDSL_BWT_CONSTRUCT
00023 
00024 #include "typedefs.hpp"
00025 #include "int_vector.hpp"
00026 #include "util.hpp"
00027 #include "testutils.hpp"
00028 
00029 #include <iostream>
00030 #include <fstream>
00031 #include <stdexcept>
00032 #include <list>
00033 
00034 namespace sdsl
00035 {
00036 
00044 bool construct_bwt(tMSS& file_map, const std::string& dir, const std::string& id);
00045 
00046 /* Constructs the Burrows and Wheeler Transform (BWT) from text and suffix array
00047  * \param file_map A map, which contains the paths of the precalculated files like suffix array or text
00048  * \param dir      Directory in which the result should be written on disk.
00049  * \param id       Id which should be used to build a file name for the calculated BWT.
00050  * \par Space complexity:
00051  *        \f$2n\f$ bytes
00052  */
00053 bool construct_bwt2(tMSS& file_map, const std::string& dir, const std::string& id);
00054 
00055 
00056 /*
00057         bool construct_bwt( tMSS &file_map, const std::string &dir, const std::string &id){
00058                 typedef int_vector<>::size_type size_type;
00059                 write_R_output("csa", "construct BWT", "begin", 1, 0);
00060                 if( file_map.find("bwt") == file_map.end() ){ // if bwt is not already on disk => calculate it
00061                         int_vector_file_buffer<8> text_buf( file_map["text"].c_str() );
00062                         size_type n = text_buf.int_vector_size;
00063                         int_vector_file_buffer<> sa_buf(file_map["sa"].c_str());
00064                         unsigned char *bwt = new unsigned char[n+1];
00065                         unsigned char *text = NULL;
00066                         util::load_from_int_vector_buffer(text, text_buf);
00067                         for(size_type i=0, r_sum=0, r = sa_buf.load_next_block(); r_sum < n; ){
00068                                 for(; i<r_sum+r; ++i){
00069                                         bwt[i] = text[ (sa_buf[i-r_sum]+n-1)%n ];
00070                                 }
00071                                 r_sum += r; r = sa_buf.load_next_block();
00072                         }
00073                         delete [] text; text = NULL;
00074                         write_R_output("csa", "store BWT", "begin", 1, 0);
00075 
00076                         if( !util::store_to_file(char_array_serialize_wrapper<>(bwt,n), (dir+"bwt_"+id).c_str() ) ){
00077                                 throw std::ios_base::failure( "#csa_construct: Cannot store bwt to file system!" );
00078                                 return false;
00079                         }else{
00080                                 file_map["bwt"] = dir+"bwt_"+id;
00081                         }
00082                         delete [] bwt; bwt = NULL;
00083                         write_R_output("csa", "store BWT", "end", 1, 0);
00084                 }
00085                 write_R_output("csa", "construct BWT", "end", 1, 0);
00086                 return true;
00087         }
00088 */
00089 
00090 }// end namespace
00091 
00092 #endif