SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
A class for the Compressed Suffix Tree (CST) proposed by Ohlebusch and Gog. More...
#include <cst_sct.hpp>
Public Types | |
typedef Csa::value_type | value_type |
typedef cst_dfs_const_forward_iterator < cst_sct > | const_iterator |
typedef const_iterator | iterator |
typedef cst_bottom_up_const_forward_iterator < cst_sct > | const_bottom_up_iterator |
typedef const_bottom_up_iterator | bottom_up_iterator |
typedef const value_type | const_reference |
typedef const_reference | reference |
typedef const_reference * | pointer |
typedef const pointer | const_pointer |
typedef Csa::size_type | size_type |
typedef size_type | cst_size_type |
typedef ptrdiff_t | difference_type |
typedef Csa | csa_type |
typedef Lcp | lcp_type |
typedef Bp_support | bp_support_type |
typedef Csa::pattern_type | pattern_type |
typedef Csa::char_type | char_type |
typedef lcp_interval< size_type > | node_type |
Type for the nodes in the tree. | |
typedef cst_tag | index_category |
Public Member Functions | |
cst_sct () | |
Default Constructor. | |
template<uint8_t int_width, class size_type_class , uint8_t int_width_1, class size_type_class_1 , uint8_t int_width_2, class size_type_class_2 > | |
cst_sct (const std::string &cst_file_name, int_vector_file_buffer< int_width, size_type_class > &lcp_buf, int_vector_file_buffer< int_width_1, size_type_class_1 > &sa_buf, int_vector_file_buffer< int_width_2, size_type_class_2 > &isa_buf, std::string dir="./", bool build_only_bps=false) | |
cst_sct (tMSS &file_map, const std::string &dir, const std::string &id, bool build_only_bps=false) | |
cst_sct (const cst_sct &cst) | |
Copy constructor. | |
void | construct (tMSS &file_map, const std::string &dir, const std::string &id, bool build_only_bps=false) |
~cst_sct () | |
Default Destructor. | |
size_type | size () const |
Number of leafs of the suffix tree. | |
bool | empty () const |
Returns if the data strucutre is empty. | |
void | swap (cst_sct< Csa, Lcp > &cst) |
Swap method for cst_sct. | |
const_iterator | begin () const |
Returns a const_iterator to the first element. | |
const_iterator | end () const |
Returns a const_iterator to the element after the last element. | |
const_bottom_up_iterator | begin_bottom_up () const |
Returns an iterator to the first element of a bottom-up traversal of the tree. | |
const_bottom_up_iterator | end_bottom_up () const |
Returns an iterator to the element after the last element of a bottom-up traversal of the tree. | |
cst_sct & | operator= (const cst_sct &cst) |
Assignment Operator. | |
bool | operator== (const cst_sct &cst) const |
Equality Operator. | |
bool | operator!= (const cst_sct &cst) const |
Unequality Operator. | |
size_type | serialize (std::ostream &out) const |
Serialize to a stream. | |
void | load (std::istream &in) |
Load from a stream. | |
node_type | root () const |
Return the root of the suffix tree. | |
bool | is_leaf (const node_type &v) const |
Decide if a node is a leaf in the suffix tree. | |
node_type | ith_leaf (size_type i) const |
Return the i-th leaf (1-based from left to right) of the suffix tree. | |
size_type | leaves_in_the_subtree (const node_type &v) const |
Calculate the number of leaves in the subtree rooted at node v. | |
node_type | leftmost_leaf_in_the_subtree (const node_type &v) const |
Calculates the leftmost leaf in the subtree rooted at node v. | |
node_type | rightmost_leaf_in_the_subtree (const node_type &v) const |
Calculates the rightmost leaf in the subtree rooted at node v. | |
size_type | lb (const node_type &v) const |
Calculates the index of the leftmost leaf in the corresponding suffix array. | |
size_type | rb (const node_type &v) const |
Calculates the index of the rightmost leaf in the corresponding suffix array. | |
node_type | parent (const node_type &v) const |
Calculate the parent node of a node v. | |
node_type | ith_child (const node_type &v, size_type i) const |
Get the ith child of a node v. | |
size_type | degree (const node_type &v) const |
Get the number of children of a node v. | |
node_type | sibling (const node_type &v) const |
Returns the next sibling of node v. | |
node_type | sibling_naive (const node_type &v) const |
node_type | child_linear (const node_type &v, unsigned char c, size_type &char_pos) const |
Get the child w of v which edge label (v,w) starts with character c. | |
node_type | child (const node_type &v, const unsigned char c, size_type &char_pos) const |
Get the child w of node v which edge label (v,w) starts with character c. | |
node_type | child (const node_type &v, const unsigned char c) |
Get the child w of node v which edge label (v,w) starts with character c. | |
unsigned char | edge (const node_type &v, size_type d) const |
Returns the d-th character (1-based indexing) of the edge-label pointing to v. | |
node_type | lca (node_type v, node_type w) const |
Calculate the lowest common ancestor (lca) of two nodes v and w of the suffix tree. | |
size_type | depth (const node_type &v) const |
Returns the string depth of node v. | |
size_type | node_depth (node_type v) const |
Returns the node depth of node v. | |
node_type | sl (const node_type &v) const |
Compute the suffix link of node v. | |
node_type | wl (const node_type &v, const unsigned char c) const |
Compute the Weiner link of node v and character c. | |
size_type | sn (const node_type &v) const |
Compute the suffix number of a leaf node v. | |
size_type | id (const node_type &v) const |
Computes a unique identification number for a node of the suffx tree in the range [0..2*size()-1]. | |
size_type | nodes () const |
Get the number of nodes of the suffix tree. | |
node_type | node (size_type lb, size_type rb, size_type l=0) const |
Get the node in the suffix tree which corresponds to the lcp-interval [lb..rb]. | |
void | print_info () const |
Print some infos about the size of the compressed suffix tree. | |
Static Public Member Functions | |
static size_type | max_size () |
Returns the largest size that cst_sct can ever have. | |
Public Attributes | |
const csa_type & | csa |
The compressed suffix array the suffix tree is based on. | |
const lcp_type & | lcp |
The lcp array the suffix tree is based on. | |
const bit_vector & | bp |
The balanced parentheses sequence of the Super-Cartesian tree the suffix tree is based on. | |
const bp_support_type & | bp_support |
The balanced parentheses sequence support for member bp. |
A class for the Compressed Suffix Tree (CST) proposed by Ohlebusch and Gog.
The CST is parameterized by
It also contains a sdsl::bit_vector which represents the balanced parentheses sequence of the Super-Cartesian tree of the lcp array. This bit_vector can be accessed via the member .
A node of the csa_sct is represented by an sdsl::lcp_interval . The size of the sdsl::cst_sct is smaller than the size of a sdsl::cst_sada since the tree topology needs only bits in contrast to the bits in sdsl::cst_sada. See the diagram below for an example which gives us the sizes for the compressed suffix arrays for the file english.50MB from the pizza and chili website.
\image html ./example/size_of_csts.png "Size comparison between cst_sct and cst_sada" \image latex ./example/size_of_csts.eps "Size comparison between cst_sct and cst_sada" width=0.7\textwidth
Applications of the CST: The compressed suffix tree could be used for string matching and many other application in sequence analysis. 17 applications are in the book "Algorithms on Strings, Trees, and Sequences" of Dan Gusfield.
const_iterator sdsl::cst_sct< Csa, Lcp, Bp_support >::begin | ( | ) | const [inline] |
bool sdsl::cst_sct< Csa, Lcp, Bp_support >::empty | ( | ) | const [inline] |
Returns if the data strucutre is empty.
Required for the Container Concept of the STL.A
const_iterator sdsl::cst_sct< Csa, Lcp, Bp_support >::end | ( | ) | const [inline] |
Returns a const_iterator to the element after the last element.
Required for the STL Container Concept.
void sdsl::cst_sct< Csa, Lcp, Bp_support >::load | ( | std::istream & | in | ) |
Load from a stream.
in | Inputstream to load the data structure from. |
static size_type sdsl::cst_sct< Csa, Lcp, Bp_support >::max_size | ( | ) | [inline, static] |
bool sdsl::cst_sct< Csa, Lcp, Bp_support >::operator!= | ( | const cst_sct< Csa, Lcp, Bp_support > & | cst | ) | const |
Unequality Operator.
Two Instances of cst_sct are equal if not all their members are equal.
cst_sct< Csa, Lcp, Bp_support > & sdsl::cst_sct< Csa, Lcp, Bp_support >::operator= | ( | const cst_sct< Csa, Lcp, Bp_support > & | cst | ) |
Assignment Operator.
Required for the Assignable Concept of the STL.
bool sdsl::cst_sct< Csa, Lcp, Bp_support >::operator== | ( | const cst_sct< Csa, Lcp, Bp_support > & | cst | ) | const |
Equality Operator.
Two Instances of cst_sct are equal if all their members are equal.
cst_sct< Csa, Lcp, Bp_support >::size_type sdsl::cst_sct< Csa, Lcp, Bp_support >::serialize | ( | std::ostream & | out | ) | const |
Serialize to a stream.
out | Outstream to write the data structure. |
size_type sdsl::cst_sct< Csa, Lcp, Bp_support >::size | ( | ) | const [inline] |
void sdsl::cst_sct< Csa, Lcp, Bp_support >::swap | ( | cst_sct< Csa, Lcp > & | cst | ) |
Swap method for cst_sct.
The swap method can be defined in terms of assignment. This requires three assignments, each of which, for a container type, is linear in the container's size. In a sense, then, a.swap(b) is redundant. This implementation guaranties a run-time complexity that is constant rather than linear.
cst | cst_sct to swap. |
Required for the Assignable Conecpt of the STL.