50 #include "EST_Ngrammar.h"
52 static double fs_find_backoff_prob(
EST_Ngrammar *backoff_ngrams,
56 void Ngram_freqsmooth(
EST_Ngrammar &ngram,
int smooth_thresh1,
63 Good_Turing_smooth(ngram,smooth_thresh1,0);
65 fs_build_backoff_ngrams(backoff_ngrams,ngram);
67 fs_backoff_smooth(backoff_ngrams,ngram,smooth_thresh2);
69 delete [] backoff_ngrams;
73 void fs_build_backoff_ngrams(
EST_Ngrammar *backoff_ngrams,
80 for (i=0; i < ngram.order()-1; i++)
81 backoff_ngrams[i].init(i+1,EST_Ngrammar::dense,
82 *ngram.vocab,*ngram.pred_vocab);
84 for (i=0; i < ngram.num_states(); i++)
89 !ngram.p_states[i].pdf().
item_end(k);
94 ngram.p_states[i].pdf().
item_freq(k,name,freq);
96 for (j=0; j < ngram.order()-1; j++)
100 for (l=0; l < j; l++)
101 nnn[l] = words(ngram.order()-1-j);
102 backoff_ngrams[j].accumulate(nnn,freq);
119 if (ngram.representation() != EST_Ngrammar::dense)
121 cerr <<
"Ngrammar: can only ptsmooth dense ngrammars" << endl;
126 for (i=0; i < ngram.num_states(); i++)
128 if (ngram.p_states[i].pdf().
samples() < smooth_thresh)
131 occurs = ngram.p_states[i].pdf().
samples();
142 words[words.
n()-1] = name;
144 fs_find_backoff_prob(backoff_ngrams,
157 static double fs_find_backoff_prob(
EST_Ngrammar *backoff_ngrams,
170 for(i=0; i<order; i++)
171 nnn[order-1-i] = words(words.
n()-1-i);
173 if (backoff_ngrams[order-1].frequency(nnn) < smooth_thresh)
174 return fs_find_backoff_prob(backoff_ngrams,
175 order-1,words,smooth_thresh);
177 return backoff_ngrams[order-1].probability(nnn);