Public Types | |
enum | representation_t { sparse, dense, backoff } |
enum | entry_t { frequencies, log_frequencies } |
Public Member Functions | |
EST_Ngrammar (int o, representation_t r, const EST_StrList &wordlist) | |
EST_Ngrammar (int o, representation_t r, const EST_StrList &wordlist, const EST_StrList &predlist) | |
EST_Ngrammar (int o, representation_t r, EST_Discrete &v) | |
void | default_values () |
void | clear () |
bool | init (int o, representation_t r, const EST_StrList &wordlist) |
bool | init (int o, representation_t r, const EST_StrList &wordlist, const EST_StrList &predlist) |
bool | init (int o, representation_t r, EST_Discrete &v) |
bool | init (int o, representation_t r, EST_Discrete &v, EST_Discrete &pv) |
int | num_states (void) const |
double | samples (void) const |
int | order () const |
int | get_vocab_length () const |
EST_String | get_vocab_word (int i) const |
int | get_vocab_word (const EST_String &s) const |
int | get_pred_vocab_length () const |
EST_String | get_pred_vocab_word (int i) const |
int | get_pred_vocab_word (const EST_String &s) const |
int | closed_vocab () const |
entry_t | entry_type () const |
representation_t | representation () const |
bool | build (const EST_StrList &filenames, const EST_String &prev=SENTENCE_START_MARKER, const EST_String &prev_prev=SENTENCE_END_MARKER, const EST_String &last=SENTENCE_END_MARKER, const EST_String &input_format="", const EST_String &oov_mode="", const int mincount=1, const int maxcount=10) |
void | accumulate (const EST_StrVector &words, const double count=1) |
void | accumulate (const EST_IVector &words, const double count=1) |
void | make_htk_compatible () |
EST_read_status | load (const EST_String &filename) |
EST_read_status | load (const EST_String &filename, const EST_StrList &wordlist) |
EST_write_status | save (const EST_String &filename, const EST_String type="cstr_ascii", const bool trace=false, double floor=0.0) |
int | wordlist_index (const EST_String &word, const bool report=true) const |
const EST_String & | wordlist_index (int i) const |
int | predlist_index (const EST_String &word) const |
const EST_String & | predlist_index (int i) const |
bool | set_entry_type (entry_t new_type) |
bool | set_representation (representation_t new_representation) |
double | probability (const EST_StrVector &words, bool force=false, const bool trace=false) const |
double | frequency (const EST_StrVector &words, bool force=false, const bool trace=false) const |
const EST_String & | predict (const EST_StrVector &words, double *prob, int *state) const |
const EST_String & | predict (const EST_StrVector &words) const |
const EST_String & | predict (const EST_StrVector &words, double *prob) const |
const EST_String & | predict (const EST_IVector &words, double *prob, int *state) const |
const EST_String & | predict (const EST_IVector &words) const |
const EST_String & | predict (const EST_IVector &words, double *prob) const |
int | find_state_id (const EST_StrVector &words) const |
int | find_state_id (const EST_IVector &words) const |
int | find_next_state_id (int state, int word) const |
double | reverse_probability (const EST_StrVector &words, bool force=false) const |
double | reverse_probability (const EST_IVector &words, bool force=false) const |
const EST_DiscreteProbDistribution & | prob_dist (const EST_StrVector &words) const |
const EST_DiscreteProbDistribution & | prob_dist (const EST_IVector &words) const |
const EST_DiscreteProbDistribution & | prob_dist (int state) const |
void | fill_window_start (EST_IVector &window, const EST_String &prev, const EST_String &prev_prev) const |
void | fill_window_start (EST_StrVector &window, const EST_String &prev, const EST_String &prev_prev) const |
bool | ngram_exists (const EST_StrVector &words) const |
bool | ngram_exists (const EST_StrVector &words, const double threshold) const |
const double | get_backoff_weight (const EST_StrVector &words) const |
bool | set_backoff_weight (const EST_StrVector &words, const double w) |
void | print_freqs (ostream &os, double floor=0.0) |
bool | compute_backoff_weights (const int mincount=1, const int maxcount=10) |
bool | merge (EST_Ngrammar &n, float weight) |
Protected Member Functions | |
bool | init_sparse_representation () |
bool | init_dense_representation () |
const double | get_backoff_discount (const int order, const double freq) const |
bool | init_backoff_representation () |
void | prune_backoff_representation (EST_BackoffNgrammarState *start_state=NULL) |
void | backoff_restore_unigram_states () |
int | find_dense_state_index (const EST_IVector &words, int index=0) const |
const EST_StrVector & | make_ngram_from_index (const int i) const |
bool | init_vocab (const EST_StrList &wordlist) |
bool | init_vocab (const EST_StrList &word_list, const EST_StrList &pred_list) |
bool | check_vocab (const EST_StrList &wordlist) |
const EST_String & | lastword (const EST_StrVector &words) const |
const int | lastword (const EST_IVector &words) const |
bool | sparse_to_dense () |
bool | dense_to_sparse () |
void | take_logs () |
void | take_exps () |
void | freqs_to_probs () |
bool | build_sparse (const EST_String &filename, const EST_String &prev, const EST_String &prev_prev, const EST_String &last) |
bool | build_ngram (const EST_String &filename, const EST_String &prev, const EST_String &prev_prev, const EST_String &last, const EST_String &input_format) |
void | iterate (EST_StrVector &words, void(*function)(EST_Ngrammar *n, EST_StrVector &words, void *params), void *params) |
void | const_iterate (EST_StrVector &words, void(*function)(const EST_Ngrammar *const n, EST_StrVector &words, void *params), void *params) const |
bool | p_init (int o, representation_t r) |
bool | oov_preprocess (const EST_String &filename, EST_String &new_filename, const EST_String &what) |
const EST_NgrammarState & | find_state_const (const EST_StrVector &words) const |
EST_NgrammarState & | find_state (const EST_StrVector &words) |
const EST_NgrammarState & | find_state_const (const EST_IVector &words) const |
EST_NgrammarState & | find_state (const EST_IVector &words) |
const EST_DiscreteProbDistribution & | backoff_prob_dist (const EST_StrVector &words) const |
const double | backoff_reverse_probability_sub (const EST_StrVector &words, const EST_BackoffNgrammarState *root) const |
const double | backoff_probability (const EST_StrVector &words, const bool trace=false) const |
const double | backoff_reverse_probability (const EST_StrVector &words) const |
const EST_String & | backoff_most_probable (const EST_StrVector &words, double *prob=NULL) const |
void | backoff_traverse (EST_BackoffNgrammarState *start_state, void(*function)(EST_BackoffNgrammarState *s, void *params), void *params) |
void | backoff_traverse (EST_BackoffNgrammarState *start_state, void(*function)(EST_BackoffNgrammarState *s, void *params), void *params, const int level) |
Protected Attributes | |
int | p_order |
int | p_num_samples |
double | p_number_of_sentences |
EST_String | p_sentence_start_marker |
EST_String | p_sentence_end_marker |
representation_t | p_representation |
entry_t | p_entry_type |
EST_PredictionSuffixTree | sparse_representation |
EST_BackoffNgrammarState * | backoff_representation |
double | backoff_threshold |
double | backoff_unigram_floor_freq |
EST_DVector * | backoff_discount |
int | p_num_states |
EST_NgrammarState * | p_states |
EST_Discrete * | vocab |
EST_Discrete * | pred_vocab |
EST_DiscreteProbDistribution | vocab_pdf |
bool | allow_oov |
Friends | |
class | EST_BackoffNgrammar |
ostream & | operator<< (ostream &s, EST_Ngrammar &n) |
EST_read_status | load_ngram_htk_ascii (const EST_String filename, EST_Ngrammar &n) |
EST_read_status | load_ngram_htk_binary (const EST_String filename, EST_Ngrammar &n) |
EST_read_status | load_ngram_arpa (const EST_String filename, EST_Ngrammar &n, const EST_StrList &vocab) |
EST_read_status | load_ngram_cstr_ascii (const EST_String filename, EST_Ngrammar &n) |
EST_read_status | load_ngram_cstr_bin (const EST_String filename, EST_Ngrammar &n) |
EST_write_status | save_ngram_htk_ascii_sub (const EST_String &word, ostream *ost, EST_Ngrammar &n, double floor) |
EST_write_status | save_ngram_htk_ascii (const EST_String filename, EST_Ngrammar &n, double floor) |
EST_write_status | save_ngram_cstr_ascii (const EST_String filename, EST_Ngrammar &n, const bool trace, double floor) |
EST_write_status | save_ngram_cstr_bin (const EST_String filename, EST_Ngrammar &n, const bool trace, double floor) |
EST_write_status | save_ngram_arpa (const EST_String filename, EST_Ngrammar &n) |
EST_write_status | save_ngram_arpa_sub (ostream *ost, EST_Ngrammar &n, const EST_StrVector &words) |
EST_write_status | save_ngram_wfst (const EST_String filename, EST_Ngrammar &n) |
void | frequency_of_frequencies (EST_DVector &ff, EST_Ngrammar &n, int this_order) |
void | map_frequencies (EST_Ngrammar &n, const EST_DVector &map, const int this_order) |
bool | Good_Turing_smooth (EST_Ngrammar &n, int maxcount, int mincount) |
void | Good_Turing_discount (EST_Ngrammar &ngrammar, const int maxcount, const double default_discount) |
void | fs_build_backoff_ngrams (EST_Ngrammar *backoff_ngrams, EST_Ngrammar &ngram) |
int | fs_backoff_smooth (EST_Ngrammar *backoff_ngrams, EST_Ngrammar &ngram, int smooth_thresh) |
Definition at line 209 of file EST_Ngrammar.h.