Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
EST_Ngrammar Class Reference
Collaboration diagram for EST_Ngrammar:

Public Types

enum  representation_t { sparse, dense, backoff }
 
enum  entry_t { frequencies, log_frequencies }
 

Public Member Functions

 EST_Ngrammar (int o, representation_t r, const EST_StrList &wordlist)
 
 EST_Ngrammar (int o, representation_t r, const EST_StrList &wordlist, const EST_StrList &predlist)
 
 EST_Ngrammar (int o, representation_t r, EST_Discrete &v)
 
void default_values ()
 
void clear ()
 
bool init (int o, representation_t r, const EST_StrList &wordlist)
 
bool init (int o, representation_t r, const EST_StrList &wordlist, const EST_StrList &predlist)
 
bool init (int o, representation_t r, EST_Discrete &v)
 
bool init (int o, representation_t r, EST_Discrete &v, EST_Discrete &pv)
 
int num_states (void) const
 
double samples (void) const
 
int order () const
 
int get_vocab_length () const
 
EST_String get_vocab_word (int i) const
 
int get_vocab_word (const EST_String &s) const
 
int get_pred_vocab_length () const
 
EST_String get_pred_vocab_word (int i) const
 
int get_pred_vocab_word (const EST_String &s) const
 
int closed_vocab () const
 
entry_t entry_type () const
 
representation_t representation () const
 
bool build (const EST_StrList &filenames, const EST_String &prev=SENTENCE_START_MARKER, const EST_String &prev_prev=SENTENCE_END_MARKER, const EST_String &last=SENTENCE_END_MARKER, const EST_String &input_format="", const EST_String &oov_mode="", const int mincount=1, const int maxcount=10)
 
void accumulate (const EST_StrVector &words, const double count=1)
 
void accumulate (const EST_IVector &words, const double count=1)
 
void make_htk_compatible ()
 
EST_read_status load (const EST_String &filename)
 
EST_read_status load (const EST_String &filename, const EST_StrList &wordlist)
 
EST_write_status save (const EST_String &filename, const EST_String type="cstr_ascii", const bool trace=false, double floor=0.0)
 
int wordlist_index (const EST_String &word, const bool report=true) const
 
const EST_Stringwordlist_index (int i) const
 
int predlist_index (const EST_String &word) const
 
const EST_Stringpredlist_index (int i) const
 
bool set_entry_type (entry_t new_type)
 
bool set_representation (representation_t new_representation)
 
double probability (const EST_StrVector &words, bool force=false, const bool trace=false) const
 
double frequency (const EST_StrVector &words, bool force=false, const bool trace=false) const
 
const EST_Stringpredict (const EST_StrVector &words, double *prob, int *state) const
 
const EST_Stringpredict (const EST_StrVector &words) const
 
const EST_Stringpredict (const EST_StrVector &words, double *prob) const
 
const EST_Stringpredict (const EST_IVector &words, double *prob, int *state) const
 
const EST_Stringpredict (const EST_IVector &words) const
 
const EST_Stringpredict (const EST_IVector &words, double *prob) const
 
int find_state_id (const EST_StrVector &words) const
 
int find_state_id (const EST_IVector &words) const
 
int find_next_state_id (int state, int word) const
 
double reverse_probability (const EST_StrVector &words, bool force=false) const
 
double reverse_probability (const EST_IVector &words, bool force=false) const
 
const
EST_DiscreteProbDistribution
prob_dist (const EST_StrVector &words) const
 
const
EST_DiscreteProbDistribution
prob_dist (const EST_IVector &words) const
 
const
EST_DiscreteProbDistribution
prob_dist (int state) const
 
void fill_window_start (EST_IVector &window, const EST_String &prev, const EST_String &prev_prev) const
 
void fill_window_start (EST_StrVector &window, const EST_String &prev, const EST_String &prev_prev) const
 
bool ngram_exists (const EST_StrVector &words) const
 
bool ngram_exists (const EST_StrVector &words, const double threshold) const
 
const double get_backoff_weight (const EST_StrVector &words) const
 
bool set_backoff_weight (const EST_StrVector &words, const double w)
 
void print_freqs (ostream &os, double floor=0.0)
 
bool compute_backoff_weights (const int mincount=1, const int maxcount=10)
 
bool merge (EST_Ngrammar &n, float weight)
 

Protected Member Functions

bool init_sparse_representation ()
 
bool init_dense_representation ()
 
const double get_backoff_discount (const int order, const double freq) const
 
bool init_backoff_representation ()
 
void prune_backoff_representation (EST_BackoffNgrammarState *start_state=NULL)
 
void backoff_restore_unigram_states ()
 
int find_dense_state_index (const EST_IVector &words, int index=0) const
 
const EST_StrVectormake_ngram_from_index (const int i) const
 
bool init_vocab (const EST_StrList &wordlist)
 
bool init_vocab (const EST_StrList &word_list, const EST_StrList &pred_list)
 
bool check_vocab (const EST_StrList &wordlist)
 
const EST_Stringlastword (const EST_StrVector &words) const
 
const int lastword (const EST_IVector &words) const
 
bool sparse_to_dense ()
 
bool dense_to_sparse ()
 
void take_logs ()
 
void take_exps ()
 
void freqs_to_probs ()
 
bool build_sparse (const EST_String &filename, const EST_String &prev, const EST_String &prev_prev, const EST_String &last)
 
bool build_ngram (const EST_String &filename, const EST_String &prev, const EST_String &prev_prev, const EST_String &last, const EST_String &input_format)
 
void iterate (EST_StrVector &words, void(*function)(EST_Ngrammar *n, EST_StrVector &words, void *params), void *params)
 
void const_iterate (EST_StrVector &words, void(*function)(const EST_Ngrammar *const n, EST_StrVector &words, void *params), void *params) const
 
bool p_init (int o, representation_t r)
 
bool oov_preprocess (const EST_String &filename, EST_String &new_filename, const EST_String &what)
 
const EST_NgrammarStatefind_state_const (const EST_StrVector &words) const
 
EST_NgrammarStatefind_state (const EST_StrVector &words)
 
const EST_NgrammarStatefind_state_const (const EST_IVector &words) const
 
EST_NgrammarStatefind_state (const EST_IVector &words)
 
const
EST_DiscreteProbDistribution
backoff_prob_dist (const EST_StrVector &words) const
 
const double backoff_reverse_probability_sub (const EST_StrVector &words, const EST_BackoffNgrammarState *root) const
 
const double backoff_probability (const EST_StrVector &words, const bool trace=false) const
 
const double backoff_reverse_probability (const EST_StrVector &words) const
 
const EST_Stringbackoff_most_probable (const EST_StrVector &words, double *prob=NULL) const
 
void backoff_traverse (EST_BackoffNgrammarState *start_state, void(*function)(EST_BackoffNgrammarState *s, void *params), void *params)
 
void backoff_traverse (EST_BackoffNgrammarState *start_state, void(*function)(EST_BackoffNgrammarState *s, void *params), void *params, const int level)
 

Protected Attributes

int p_order
 
int p_num_samples
 
double p_number_of_sentences
 
EST_String p_sentence_start_marker
 
EST_String p_sentence_end_marker
 
representation_t p_representation
 
entry_t p_entry_type
 
EST_PredictionSuffixTree sparse_representation
 
EST_BackoffNgrammarStatebackoff_representation
 
double backoff_threshold
 
double backoff_unigram_floor_freq
 
EST_DVectorbackoff_discount
 
int p_num_states
 
EST_NgrammarStatep_states
 
EST_Discretevocab
 
EST_Discretepred_vocab
 
EST_DiscreteProbDistribution vocab_pdf
 
bool allow_oov
 

Friends

class EST_BackoffNgrammar
 
ostream & operator<< (ostream &s, EST_Ngrammar &n)
 
EST_read_status load_ngram_htk_ascii (const EST_String filename, EST_Ngrammar &n)
 
EST_read_status load_ngram_htk_binary (const EST_String filename, EST_Ngrammar &n)
 
EST_read_status load_ngram_arpa (const EST_String filename, EST_Ngrammar &n, const EST_StrList &vocab)
 
EST_read_status load_ngram_cstr_ascii (const EST_String filename, EST_Ngrammar &n)
 
EST_read_status load_ngram_cstr_bin (const EST_String filename, EST_Ngrammar &n)
 
EST_write_status save_ngram_htk_ascii_sub (const EST_String &word, ostream *ost, EST_Ngrammar &n, double floor)
 
EST_write_status save_ngram_htk_ascii (const EST_String filename, EST_Ngrammar &n, double floor)
 
EST_write_status save_ngram_cstr_ascii (const EST_String filename, EST_Ngrammar &n, const bool trace, double floor)
 
EST_write_status save_ngram_cstr_bin (const EST_String filename, EST_Ngrammar &n, const bool trace, double floor)
 
EST_write_status save_ngram_arpa (const EST_String filename, EST_Ngrammar &n)
 
EST_write_status save_ngram_arpa_sub (ostream *ost, EST_Ngrammar &n, const EST_StrVector &words)
 
EST_write_status save_ngram_wfst (const EST_String filename, EST_Ngrammar &n)
 
void frequency_of_frequencies (EST_DVector &ff, EST_Ngrammar &n, int this_order)
 
void map_frequencies (EST_Ngrammar &n, const EST_DVector &map, const int this_order)
 
bool Good_Turing_smooth (EST_Ngrammar &n, int maxcount, int mincount)
 
void Good_Turing_discount (EST_Ngrammar &ngrammar, const int maxcount, const double default_discount)
 
void fs_build_backoff_ngrams (EST_Ngrammar *backoff_ngrams, EST_Ngrammar &ngram)
 
int fs_backoff_smooth (EST_Ngrammar *backoff_ngrams, EST_Ngrammar &ngram, int smooth_thresh)
 

Detailed Description

Definition at line 209 of file EST_Ngrammar.h.


The documentation for this class was generated from the following files: