45 #include "EST_String.h"
47 #include "EST_simplestats.h"
56 const int est_64to32(
void *c)
68 for (i=0,d=0,x=1; i<24; i++)
79 #define tprob_int(X) (est_64to32(X))
82 EST_DiscreteProbDistribution::EST_DiscreteProbDistribution(
const EST_Discrete *d,
85 type = tprob_discrete;
87 num_samples = n_samples;
100 num_samples = b.num_samples;
101 discrete = b.discrete;
122 type = tprob_discrete;
126 icounts.
resize(vocab.length());
127 for (i=0; i<icounts.
length(); i++)
137 type = tprob_discrete;
141 for (i=0; i<icounts.
length(); i++)
147 icounts[tprob_int(i)] += count;
148 num_samples += count;
154 num_samples += count;
161 if (type == tprob_discrete)
163 int idx = discrete->
index(s);
164 icounts[idx] += count;
168 for (p=scounts.
list.head(); p != 0; p=p->next())
170 if (scounts.
list(p).k == s)
172 scounts.
list(p).v += count;
179 num_samples += count;
187 if (type == tprob_discrete)
190 for (i=0; i < icounts.
length(); i++)
205 *prob = probability(pt);
206 return discrete->
name(pt);
212 for (p=scounts.
list.head(); p != 0; p=p->next())
213 if (scounts.
list(p).v > max)
216 max = scounts.
list(p).v;
227 *prob = (double)max/num_samples;
228 return scounts.
list(t).k;
233 double EST_DiscreteProbDistribution::probability(
const EST_String &s)
const
235 if (frequency(s) == 0.0)
238 return (
double)frequency(s)/num_samples;
241 double EST_DiscreteProbDistribution::probability(
const int i)
const
243 if (frequency(i) == 0.0)
246 return (
double)frequency(i)/num_samples;
249 double EST_DiscreteProbDistribution::frequency(
const EST_String &s)
const
251 if (type == tprob_discrete)
257 double EST_DiscreteProbDistribution::frequency(
const int i)
const
259 if (type == tprob_discrete)
263 cerr <<
"ProbDistribution: can't access string type pd with int\n";
270 if (type == tprob_discrete)
278 num_samples -= scounts.
val_def(s,0);
286 if (type == tprob_discrete)
288 num_samples -= icounts[i];
294 cerr <<
"ProbDistribution: can't access string type pd with int\n";
301 if (type == tprob_discrete)
303 num_samples -= icounts[tprob_int(i)];
305 icounts[tprob_int(i)] = c;
309 cerr <<
"ProbDistribution: can't access string type pd with int\n";
317 if (type == tprob_discrete)
325 if (type == tprob_discrete)
328 cerr <<
"ProbDistribution: can't access string type pd with int\n";
333 if (type == tprob_discrete)
334 icounts[tprob_int(i)] = c;
336 cerr <<
"ProbDistribution: can't access string type pd with int\n";
346 if (type == tprob_discrete)
348 for (i=0; i < icounts.
length(); i++)
350 double prob = icounts.
a_no_check(i)/num_samples;
352 e += prob * log(prob);
357 for (p=scounts.
list.head(); p != 0; p=p->next())
359 double prob = scounts.
list(p).v/num_samples;
361 e += prob * log(prob);
372 if (type == tprob_discrete)
375 return scounts.
list.head();
380 if (type == tprob_discrete)
381 return (tprob_int(idx) >= icounts.
length());
388 if (type == tprob_discrete)
389 return (
EST_Litem *)(((
unsigned char *)idx)+1);
396 if (type == tprob_discrete)
397 return discrete->
name(tprob_int(idx));
399 return scounts.
list(idx).k;
404 if (type == tprob_discrete)
406 s = discrete->
name(tprob_int(idx));
407 freq = icounts(tprob_int(idx));
411 s = scounts.
list(idx).k;
412 freq = scounts.
list(idx).v;
418 if (type == tprob_discrete)
420 prob = probability(tprob_int(idx));
421 s = discrete->
name(tprob_int(idx));
425 s = scounts.
list(idx).k;
426 prob = (double)scounts.
list(idx).v/num_samples;
442 s <<
"(" << name <<
"=" << prob <<
") ";
446 << pd.
samples() <<
" sum=" << sum <<
")";