Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
EST_lattice.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Simon King */
34 /* Date : November 1996 */
35 /*-----------------------------------------------------------------------*/
36 /* Lattice/Finite State Network */
37 /* */
38 /*=======================================================================*/
39 
40 #include "EST_lattice.h"
41 #include <fstream>
42 #include <cstdlib>
43 
44 Lattice::Lattice()
45 {
46  tf=NULL;
47 }
48 
49 
50 Lattice::~Lattice()
51 {
52 
53 }
54 
56 Lattice::start_node()
57 {
58  if(nodes.head() != NULL )
59  return nodes(nodes.head());
60  else{
61  cerr << "LAttice has no nodes !" << endl;
62  return NULL;
63  }
64 
65 }
66 
67 #if 0
68 bool Lattice::build_qmap(Bigram &g, float error_margin)
69 {
70 
71  // very crude VQ (not vectors though)
72  // works best on log bigrams ?
73 
74  // to do : automatically determine error_margin
75 
76  int i,j;
77  EST_Litem *l_ptr;
78  bool flag;
79 
80  // first, build a list, then transfer to array
81  Tlist<float> list_qmap;
82 
83  qmap_error_margin = error_margin;
84 
85  for (i = 0; i < g.size(); ++i){
86  for (j = 0; j < g.size(); ++j){
87 
88  flag = false;
89  for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
90  if(fabs(list_qmap(l_ptr) - g.p(i,j)) <= error_margin){
91  flag = true;
92  break;
93  }
94 
95  if(!flag)
96  list_qmap.append(g.p(i,j));
97 
98  }
99  }
100 
101  // special zero (within error_margin) entry, if not already there
102  flag = false;
103  for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
104  if(fabs(list_qmap(l_ptr)) <= error_margin){
105  flag = true;
106  break;
107  }
108 
109  if(!flag)
110  list_qmap.append(0);
111 
112  qsort(list_qmap);
113 
114  i=0;
115  for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
116  i++;
117 
118  // transfer to array
119  qmap.resize(i);
120  i=0;
121  for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
122  qmap(i++) = list_qmap(l_ptr);
123 
124  list_qmap.clear();
125 
126 
127  cerr << "Built qmap with " << i << " entries" << endl;
128 
129  return true;
130 }
131 
132 
133 bool
134 Lattice::build_nmap(Bigram &g)
135 {
136 
137 
138  int j;
139  //name_map_entry_t entry;
140  EST_Litem *l_ptr;
141 
142  // first, build a list, then transfer to array
143  Tlist<EST_String> list_nmap;
144 
145  // wordlist must not have !ENTER of !EXIT in it
146  for (j = 0; j < g.size() - 2; ++j){
147  if(g.words(j) == "!ENTER"){
148  cerr << "Wordlist contains special word !ENTER" << endl;
149  return false;
150  }
151  if(g.words(j) == "!EXIT"){
152  cerr << "Wordlist contains special word !EXIT" << endl;
153  return false;
154  }
155  }
156 
157 
158 
159 
160  // add all words
161  for (j = 0; j < g.size() - 2; ++j)
162  list_nmap.append(g.words(j));
163 
164  // special enter and exit
165  list_nmap.append("!ENTER");
166  list_nmap.append("!EXIT");
167 
168  qsort(list_nmap);
169 
170  cerr << list_nmap << endl;
171 
172  j=0;
173  for(l_ptr=list_nmap.head();l_ptr != 0; l_ptr=l_ptr->next())
174  j++;
175 
176  // transfer to array
177  nmap.resize(j);
178  j=0;
179  for(l_ptr=list_nmap.head();l_ptr != 0; l_ptr=l_ptr->next())
180  nmap(j++) = list_nmap(l_ptr);
181 
182  list_nmap.clear();
183  cerr << "Built nmap with " << j << " entries" << endl;
184 
185  return true;
186 }
187 
188 
189 bool
190 Lattice::construct_alphabet(Bigram &g)
191 {
192 
193  int i,j,enteri,exiti,aindex,qindex,nindex,count;
194  symbol_t *sym;
195  EST_Litem *a_ptr;
196  bool flag;
197 
198  if(!build_qmap(g,1.0e-02) || !build_nmap(g)) // to do : fix
199  return false;
200 
201 
202  // temporary list
203  Tlist<symbol_t*> list_alphabet;
204 
205  // alphabet consists of all occurring combinations of
206  // nmap and qmap entries
207 
208  enteri = g.size() - 2;
209  exiti = g.size() - 1;
210 
211  aindex=0;
212 
213  // order is this way for speed
214  for (j = 0; j < g.size(); ++j){
215 
216  cerr << "constructing alphabet " << (int)((float)j*100/(float)g.size()) << "% \r";
217 
218  if(j == enteri)
219  nindex = nmap_name_to_index("!ENTER");
220  else if(j == exiti)
221  nindex = nmap_name_to_index("!EXIT");
222  else
223  nindex = nmap_name_to_index(g.words(j)); // check !!
224 
225  for (i = 0; i < g.size(); ++i){
226 
227  qindex = qmap_value_to_index(g.p(i,j));
228 
229  // have we got sym already ?
230  flag=false;
231  for(a_ptr=list_alphabet.tail();a_ptr!=NULL;a_ptr=a_ptr->prev()){
232  if( (list_alphabet(a_ptr)->nmap_index == nindex) &&
233  (list_alphabet(a_ptr)->qmap_index == qindex) ){
234  flag=true;
235  break;
236  }
237 
238  // since words are added in order, stop
239  // when we get to previous word
240  if(list_alphabet(a_ptr)->nmap_index != nindex)
241  break;
242  }
243 
244 
245  if(!flag){
246  sym = new symbol_t;
247  sym->qmap_index=qindex;
248  sym->nmap_index=nindex;
249  list_alphabet.append(sym);
250  }
251  }
252  }
253 
254  // special ENTER with prob of 1 symbol
255  nindex = nmap_name_to_index("!ENTER");
256  qindex = qmap_value_to_index(0); // log prob
257  flag=false;
258  for(a_ptr=list_alphabet.tail();a_ptr!=NULL;a_ptr=a_ptr->prev())
259  if( (list_alphabet(a_ptr)->nmap_index == nindex) &&
260  (list_alphabet(a_ptr)->qmap_index == qindex) ){
261  flag=true;
262  break;
263  }
264 
265  if(!flag){
266  sym = new symbol_t;
267  sym->qmap_index=qindex;
268  sym->nmap_index=nindex;
269  list_alphabet.append(sym);
270  }
271 
272  // and special no-label label (e-move)
273  sym = new symbol_t;
274  sym->qmap_index=-1;
275  sym->nmap_index=-1;
276  list_alphabet.append(sym);
277 
278  ptr_qsort(list_alphabet);
279 
280  count=0;
281  for(a_ptr=list_alphabet.head();a_ptr != NULL; a_ptr=a_ptr->next())
282  count++;
283 
284 
285  alphabet.resize(count);
286  count=0;
287  for(a_ptr=list_alphabet.head();a_ptr != NULL; a_ptr=a_ptr->next())
288  alphabet(count++) = *(list_alphabet(a_ptr));
289 
290  // .. to do - delete syms
291  list_alphabet.clear();
292 
293  e_move_symbol_index = alphabet_index_lookup(-1,-1);
294 
295  cerr << "Alphabet has " << count << " symbols " << endl;
296 
297  return true;
298 }
299 
300 
301 bool
302 Lattice::construct(Bigram &g)
303 {
304 
305  // every word becomes a node, every non-zero transition becomes an arc
306  // eliminate any transitions into ENTER and out of EXIT
307  int i,j,k;
308  EST_Litem *n_ptr,*a_ptr;
309  Node *n;
310  Arc *a;
311  Node *from,*to;
312  int symbol;
313  // unfortunate, but fixed in Bigram class
314  int enteri = g.size() - 2;
315  int exiti = g.size() - 1;
316  int from_name,to_name;
317 
318  // single entry node, no name since no arcs in
319  // single arc out to node called "!ENTER"
320 
321  Node *enter_node = new Node;
322  enter_node->name.append(-1); // no name !
323  nodes.append(enter_node);
324 
325  // make nodes - one for every entry in nmap
326  for(i=0; i<nmap.n(); i++){
327 
328  n = new Node;
329  n->name.append(i); // index into nmap
330  nodes.append(n);
331 
332  if(nmap(i) == "!ENTER"){ // temporary hack
333 
334  symbol = alphabet_index_lookup(i,qmap_value_to_index(0)); // for log probs !!!
335  a = new Arc;
336  a->label = symbol;
337  a->to = n;
338  enter_node->arcs_out.append(a);
339 
340  }
341 
342 
343  // keep note of final node
344  if(nmap(i) == "!EXIT") // temporary hack
345  final_nodes.append(n);
346 
347  }
348 
349  // make arcs
350  k=0;
351  for (i = 0; i < g.size(); ++i){
352  cerr << "building lattice " << (int)((float)i*100/(float)g.size()) << "% \r";
353  for (j = 0; j < g.size(); ++j){
354 
355  // ignore any transitions into ENTER or out of EXIT
356  //if((g.p(i, j) > 0) && - for probs only, can't do for log probs
357  if((j != enteri) && (i != exiti)){
358 
359  // find from and to nodes
360  from=NULL;
361  to=NULL;
362 
363  if(i == enteri)
364  from_name = nmap_name_to_index("!ENTER");
365  else
366  from_name = nmap_name_to_index(g.words(i));
367 
368  if(j == exiti)
369  to_name = nmap_name_to_index("!EXIT");
370  else
371  to_name= nmap_name_to_index(g.words(j));
372 
373  for (n_ptr = nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
374  int name = nodes(n_ptr)->name(nodes(n_ptr)->name.head());
375  if(name == from_name)
376  from = nodes(n_ptr);
377  if(name == to_name)
378  to = nodes(n_ptr);
379  }
380 
381  if(from==NULL){
382  cerr << "Couldn't find 'from' node " << nmap_index_to_name(from_name) << endl;
383  return false;
384  }
385 
386  if(to==NULL){
387  cerr << "Couldn't find 'to' node " << nmap_index_to_name(to_name) << endl;
388  return false;
389  }
390 
391  // get arc symbol - speed up this fn !
392  symbol = alphabet_index_lookup(to_name,qmap_value_to_index(g.p(i,j)));
393  if(symbol < 0){
394  cerr << "Couldn't lookup symbol in alphabet !" << endl;
395  return false;
396  }
397 
398  a = new Arc;
399  a->label = symbol;
400  a->to = to;
401 
402  from->arcs_out.append(a);
403 
404 
405  }
406  }
407  }
408  cerr << " \r";
409 
410  int c1=0,c2=0,c3=0;
411  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
412  c1++;
413  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL; a_ptr=a_ptr->next())
414  c2++;
415  }
416 
417  for (n_ptr = final_nodes.head(); n_ptr != 0; n_ptr = n_ptr->next())
418  c3++;
419 
420  cerr << "NFA has " << c1
421  << " nodes (" << c3 << " final)"
422  << " and " << c2 << " arcs" << endl;
423 
424  return true;
425 }
426 #endif
427 
428 bool Lattice::determinise()
429 {
430  // very simple, but potentially time/memory consuming
431 
432  // make a new lattice whose nodes are all permutations
433  // of the nodes in the old lattice
434  // BUT : only make the nodes which are actually required
435  // (otherwise number of new nodes = 2^(number of old nodes)
436 
437  // we will call the old lattice NFA and the new one DFA
438 
439  EST_Litem *n_ptr,*n2_ptr,*a_ptr,*l_ptr;
440  Node *new_node;
441  EST_TList<Node*> new_nodes;
442  EST_TList<Node*> new_final_nodes;
443  EST_TList<Arc*> new_arcs;
444  EST_TList<Arc*> arc_list;
445  EST_TList<int> new_name;
446  int c,count,current_label,new_arcs_made;
447  bool is_final;
448 
449  // first, sort nodes' lists of arcs by label to make
450  // it easier
451  cerr << "sorting arc lists" << endl;
452  sort_arc_lists();
453 
454  cerr << "making new nodes" << endl;
455 
456  // - for every node in the NFA, look at the outgoing arcs with
457  // the same label
458  // - make a DFA node which is a combination of the NFA nodes
459  // which are the destinations of those arcs
460 
461  // there are more DFA nodes made later
462 
463  // enter node is special case, it has no in-going arcs
464  // so will not be made in the following loop
465 
466  // for now, assume one enter node at head of list ... hmmmmm
467  new_node = new Node;
468  if(new_node == NULL){
469  cerr << "Could not allocate any more memory";
470  return false;
471  }
472  new_node->name = nodes(nodes.head())->name;
473  new_nodes.append(new_node);
474 
475 
476 
477  for (n_ptr = nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
478 
479  for (a_ptr = nodes(n_ptr)->arcs_out.head(); a_ptr != 0; a_ptr = a_ptr->next()){
480 
481  current_label = nodes(n_ptr)->arcs_out(a_ptr)->label;
482 
483  // make list of arc destinations along arcs with this label
484  is_final=false;
485  new_name.clear();
486  new_name = nodes(n_ptr)->arcs_out(a_ptr)->to->name;
487  if(final(nodes(n_ptr)->arcs_out(a_ptr)->to) )
488  is_final=true;
489  while((a_ptr != NULL) &&
490  (a_ptr->next() != NULL) &&
491  (nodes(n_ptr)->arcs_out(a_ptr->next())->label == current_label)){
492  merge_sort_unique(new_name,nodes(n_ptr)->arcs_out(a_ptr->next())->to->name);
493 
494  if( !is_final && final(nodes(n_ptr)->arcs_out(a_ptr->next())->to) )
495  is_final=true;
496 
497  a_ptr=a_ptr->next();
498 
499  }
500 
501  // make new node with this name, unless we already have one
502  bool flag=false;
503  for (n2_ptr = new_nodes.head(); n2_ptr != 0; n2_ptr = n2_ptr->next())
504  if( new_nodes(n2_ptr)->name == new_name ){
505  flag=true;
506  break;
507  }
508 
509  if(!flag){ // need to make new node
510 
511  new_node = new Node;
512  if(new_node == NULL){
513  cerr << "Could not allocate any more memory";
514  count=0;
515  for (n2_ptr = new_nodes.head(); n2_ptr != 0; n2_ptr = n2_ptr->next())
516  count++;
517  cerr << " after making " << count << " nodes" << endl;
518  return false;
519  }
520 
521  new_node->name = new_name;
522  new_nodes.append(new_node);
523 
524  if(is_final)
525  new_final_nodes.append(new_node);
526  }
527 
528  } // loop round outgoing arcs for this node
529 
530  } // loop round NFA nodes
531 
532 
533  // now, for every node in the DFA, its 'transition function' (i.e. outgoing arcs)
534  // is the sum of the transition functions of its constituent nodes from
535  // the NFA
536 
537  c=0;
538  new_arcs_made=0;
539  for (n_ptr = new_nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
540 
541  c++;
542  cerr << "Processing node " << c
543  << " arcs=" << new_arcs_made << " \r";
544 
545  // find constituent nodes in NFA (only works if NFA names are single ints !)
546  arc_list.clear();
547  for(l_ptr=new_nodes(n_ptr)->name.head(); l_ptr != 0; l_ptr = l_ptr->next()){
548 
549  for(n2_ptr = nodes.head(); n2_ptr != 0; n2_ptr = n2_ptr->next()){
550 
551  if(nodes(n2_ptr)->name(nodes(n2_ptr)->name.head()) ==
552  new_nodes(n_ptr)->name(l_ptr))
553  arc_list += nodes(n2_ptr)->arcs_out;
554 
555 
556  }
557  }
558 
559  // now we have a list of all the NFA nodes we can 'get to'
560  // from this DFA node, sort by label and find the
561  // DFA node for each label
562 
563  // or rather, take the arc list from above, and collapse
564  // all arcs with the same label into one arc to the DFA
565  // node which is the equivalent of the NFA nodes at the
566  // end of those arcs
567 
568  sort_by_label(arc_list);
569 
570  for(a_ptr=arc_list.head();a_ptr!=NULL;a_ptr=a_ptr->next()){
571 
572  current_label=arc_list(a_ptr)->label;
573  //cerr << " label " << current_label;
574 
575  is_final=false;
576  EST_TList<int> new_name2;
577  new_name2 = arc_list(a_ptr)->to->name;
578  if(final(arc_list(a_ptr)->to) )
579  is_final=true;
580  while((a_ptr != NULL) &&
581  (a_ptr->next() != NULL) &&
582  (arc_list(a_ptr->next())->label == current_label)){
583  merge_sort_unique(new_name2,arc_list(a_ptr)->to->name);
584  if( !is_final && final(arc_list(a_ptr)->to) )
585  is_final=true;
586  a_ptr=a_ptr->next();
587  }
588 
589  //cerr << " -> " << new_name2;
590 
591  // find DFA node with that name
592  bool flag=false;
593  for (n2_ptr = new_nodes.head(); n2_ptr != 0; n2_ptr = n2_ptr->next())
594  if( new_nodes(n2_ptr)->name == new_name2 ){
595  flag=true;
596  break;
597  }
598 
599  if(!flag){ // failed to make node previously, do it now
600  new_node = new Node;
601  if(new_node == NULL){
602  cerr << "Could not allocate any more memory";
603  count=0;
604  for (n2_ptr = new_nodes.head(); n2_ptr != 0; n2_ptr = n2_ptr->next())
605  count++;
606  cerr << " after making " << count << " nodes" << endl;
607  return false;
608  }
609 
610  new_node->name = new_name2;
611  new_nodes.append(new_node);
612  link(new_nodes(n_ptr),new_node,current_label);
613 
614  if(is_final)
615  new_final_nodes.append(new_node);
616 
617  }else{
618 
619  link(new_nodes(n_ptr),new_nodes(n2_ptr),current_label);
620 
621  }
622 
623  new_arcs_made++;
624  }
625 
626 
627  } // loop round DFA nodes
628 
629  cerr << endl;
630 
631  // delete old nodes, and replace with new nodes
632  for (n_ptr = nodes.head(); n_ptr != 0; n_ptr = n_ptr->next())
633  delete nodes(n_ptr);
634  nodes.clear();
635  nodes=new_nodes;
636  new_nodes.clear();
637 
638  final_nodes.clear();
639  final_nodes=new_final_nodes;
640  new_final_nodes.clear();
641 
642  int c1=0,c2=0,c3=0;
643  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
644  c1++;
645  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL; a_ptr=a_ptr->next())
646  c2++;
647  }
648 
649  for (n_ptr = final_nodes.head(); n_ptr != 0; n_ptr = n_ptr->next())
650  c3++;
651 
652  cerr << "DFA has " << c1
653  << " nodes (" << c3 << " final)"
654  << " and " << c2 << " arcs" << endl;
655 
656 
657  return true;
658 }
659 
660 bool
661 Lattice::link(Node *n1, Node *n2, int label)
662 {
663 
664  //if(l==NULL)
665  //l=&arcs;
666 
667  // create arc between two nodes
668  // in direction n1,n2
669  Arc *new_arc;
670 
671  if( (n1==NULL) || (n2==NULL) ){
672  cerr << "Can't link null nodes" << endl;
673  return false;
674  }
675 
676 
677 
678  new_arc = new Arc;
679  //new_arc->from = n1;
680  new_arc->to = n2;
681  new_arc->label = label;
682 
683  n1->arcs_out.append(new_arc);
684  //n2->arcs_in.append(new_arc);
685  //l->append(new_arc);
686 
687  return true;
688 }
689 
690 void
691 Lattice::sort_arc_lists()
692 {
693  EST_Litem *n_ptr;
694 
695  for (n_ptr = nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
696 
697  // can't sort nodes(n_ptr)->arcs_out directly
698  // since it is a list of pointers
699 
700  sort_by_label(nodes(n_ptr)->arcs_out);
701 
702  }
703 
704 }
705 
706 void
707 sort_by_label(EST_TList<Lattice::Arc*> &l){
708  ptr_qsort(l);
709 }
710 
711 
712 bool
713 Lattice::build_distinguished_state_table(bool ** &dst)
714 {
715 
716  int i,j;
717  EST_Litem *n_ptr,*n2_ptr;
718 
719  int num_nodes = nodes.length();
720  // not every element will be used
721  dst = new bool*[num_nodes];
722  if(dst==NULL){
723  cerr << "Not enough memory" << endl;
724  return false;
725  }
726  for(i=0;i<num_nodes;i++){
727  dst[i] = new bool[num_nodes];
728  if(dst[i]==NULL){
729  cerr << "Not enough memory" << endl;
730  return false;
731  }
732  for(j=0;j<num_nodes;j++)
733  dst[i][j] = false;
734 
735  }
736 
737  cerr << "final/non-final scan";
738  // any final/non-final (or non-final/final) pairs are distinguished immediately
739  for(i=0,n_ptr=nodes.head();n_ptr->next()!=NULL;n_ptr=n_ptr->next(),i++){
740  for(j=i+1,n2_ptr=n_ptr->next();n2_ptr!=NULL;n2_ptr=n2_ptr->next(),j++){
741  if(final(nodes(n_ptr)) && !final(nodes(n2_ptr)))
742  dst[i][j] = true;
743  else if(!final(nodes(n_ptr)) && final(nodes(n2_ptr)))
744  dst[i][j] = true;
745 
746  }
747  }
748  cerr << "\r \r";
749 
750  // two possible methods, depending on network representation
751  // for sparse networks, use actual nodes and their lists of arcs
752  //build_distinguished_state_table_direct(dst);
753 
754  // for dense networks, use a transition function matrix
755  // which will be faster but more memory consuming
756 
757  if(!build_transition_function()){
758  cerr << "Couldn't build transition function" << endl;
759  return false;
760  }
761 
762  if(!build_distinguished_state_table_from_transition_function(dst)){
763  cerr << "Couldn't build dst from transition function" << endl;
764  return false;
765  }
766 
767  // delete tf, since it will be wrong for minimised net
768  for(i=0;i<num_nodes;i++)
769  delete [] tf[i];
770  delete [] tf;
771  tf=NULL;
772 
773  return true;
774 }
775 
776 bool
777 Lattice::build_distinguished_state_table_direct(bool ** &dst)
778 {
779 
780  int i,j,i1,j1,scan_count,this_symbol;
781  EST_Litem *n_ptr,*n2_ptr,*a_ptr,*s_ptr;
782  bool flag,flag2;
783 
784  // carry out repeated scans of the distinguished state table until no more
785  // cells are set true
786 
787  flag=true;
788  scan_count=0;
789  while(flag){
790  flag=false;
791 
792  scan_count++;
793 
794  for(i=0,n_ptr=nodes.head();n_ptr->next()!=NULL;n_ptr=n_ptr->next(),i++){
795  for(j=i+1,n2_ptr=n_ptr->next();n2_ptr!=NULL;n2_ptr=n2_ptr->next(),j++){
796  cerr << "scan " << scan_count << " : " << i << "," << j << " \r";
797 
798  if(!dst[i][j]){
799 
800  // go through alphabet, or rather just those symbols
801  // occurring on the arcs out of this pair of nodes
802 
803  flag2=true;
804  s_ptr=nodes(n_ptr)->arcs_out.head();
805  while(s_ptr != NULL){
806 
807 
808  if(flag2){ // we are going through symbols on arcs out of 'nodes(n_ptr)'
809  this_symbol=nodes(n_ptr)->arcs_out(s_ptr)->label;
810  i1 = node_index(nodes(n_ptr)->arcs_out(s_ptr)->to);
811 
812  j1=-1;
813  for(a_ptr=nodes(n2_ptr)->arcs_out.head();
814  a_ptr!=NULL; a_ptr=a_ptr->next())
815  if(nodes(n2_ptr)->arcs_out(a_ptr)->label == this_symbol)
816  j1 = node_index(nodes(n2_ptr)->arcs_out(a_ptr)->to);
817 
818  }else{ // we are going through symbols on arcs out of 'nodes(n2_ptr)'
819  this_symbol=nodes(n2_ptr)->arcs_out(s_ptr)->label;
820  j1 = node_index(nodes(n2_ptr)->arcs_out(s_ptr)->to);
821 
822  i1=-1;
823  for(a_ptr=nodes(n_ptr)->arcs_out.head();
824  a_ptr!=NULL; a_ptr=a_ptr->next())
825  if(nodes(n_ptr)->arcs_out(a_ptr)->label == this_symbol)
826  i1 = node_index(nodes(n_ptr)->arcs_out(a_ptr)->to);
827  }
828 
829 
830  // States (nodes) are distinguished if, for any symbol, they
831  // have transitions (arcs) to a pair of distinguished states.
832  // This includes the case where, for any symbol, one state
833  // has a transition and the other does not
834 
835  if( ( (i1>=0) && (j1>=0) && dst[i1][j1]) ||
836  ( (i1>=0) && (j1<0) ) ||
837  ( (j1>=0) && (i1<0) ) )
838  {
839  dst[i][j] = true;
840  flag=true;
841  break; // out of s_ptr loop
842 
843  }
844 
845  s_ptr=s_ptr->next();
846  if( (s_ptr==NULL) && (flag2) ){ // now go down second list
847  s_ptr=nodes(n2_ptr)->arcs_out.head();
848  flag2=false;
849  }
850 
851  }
852  }
853  }
854  }
855  }
856 
857  return true;
858 }
859 
860 bool
861 Lattice::build_distinguished_state_table_from_transition_function(bool ** &dst)
862 {
863  int i,j,i2,j2,k,scan_count;
864  int num_nodes = nodes.length();
865  int num_symbols = alphabet.n();
866  bool flag;
867 
868  flag=true;
869  scan_count=0;
870  while(flag){
871  flag=false;
872 
873  scan_count++;
874 
875  for(i=0;i<num_nodes-1;i++){
876 
877  cerr << "scan " << scan_count << " : row "
878  << i << " \r";
879 
880  for(j=i+1;j<num_nodes;j++){
881 
882  if( !dst[i][j] ){
883 
884  for(k=0;k<num_symbols;k++){
885 
886  i2 = tf[i][k];
887  j2 = tf[j][k];
888 
889  if((i2<0) && (j2>=0)){
890  dst[i][j] = true;
891  flag=true;
892  break;
893 
894  }else if((j2<0) && (i2>=0)){
895  dst[i][j] = true;
896  flag=true;
897  break;
898 
899  }else if( (i2>0) && (j2>0) && dst[i2][j2] ){
900  dst[i][j] = true;
901  flag=true;
902  break;
903  }
904 
905  }
906  }
907  }
908  }
909  }
910 
911  return true;
912 }
913 
914 bool
915 Lattice::minimise()
916 {
917 
918  // method, make distinguished state table
919  // scan all pairs of states (a.k.a. nodes)
920 
921 
922  int i,j;
923  EST_Litem *n_ptr,*n2_ptr,*n3_ptr,*a_ptr;
924  int num_nodes = nodes.length();
925  bool **dst = NULL; // distinguished state table
926  bool flag;
927 
928  if(!build_distinguished_state_table(dst)){
929  cerr << "Couldn't build distinguished state table" << endl;
930  return false;
931  }
932 
933  int count=0,count2=0;
934  for(i=0;i<num_nodes-1;i++)
935  for(j=i+1;j<num_nodes;j++)
936  if(!dst[i][j])
937  count++;
938  else
939  count2++;
940 
941  cerr << "There are " << count << " undistinguished pairs of nodes and "
942  << count2 << " distinguished pairs" << endl;
943 
944 
945  // make lists of nodes to be merged
946  EST_TList<Node *> merge_list;
947 
948 
949 
950  flag=true;
951  while(flag){
952  flag=false;
953 
954  merge_list.clear();
955  for(i=0,n_ptr=nodes.head();n_ptr->next()!=NULL;n_ptr=n_ptr->next(),i++){
956 
957  cerr << "merge, processing row " << i << " \r";
958 
959  for(j=i+1,n2_ptr=n_ptr->next();n2_ptr!=NULL;n2_ptr=n2_ptr->next(),j++){
960 
961  if(!dst[i][j]){
962 
963  // is the merge list empty ?
964  if(merge_list.head() == NULL){
965  // put this pair of nodes on merge list
966  merge_list.append(nodes(n_ptr));
967  merge_list.append(nodes(n2_ptr));
968  dst[i][j] = true;
969 
970  }else{ // merge list already has some items on it
971 
972  // see if either of this pair of nodes is on the merge list,
973  // and if so add the other node in the pair
974  bool add1=false,add2=false;
975  for(n3_ptr=merge_list.head();n3_ptr!=NULL;n3_ptr=n3_ptr->next()){
976  if(merge_list(n3_ptr) == nodes(n_ptr))
977  add2=true;
978  if(merge_list(n3_ptr) == nodes(n2_ptr))
979  add1=true;
980  }
981 
982 
983  if(add1 && !add2){
984  merge_list.append(nodes(n_ptr));
985  dst[i][j] = true;
986  }else if(add2 && !add1){
987  merge_list.append(nodes(n2_ptr));
988  dst[i][j] = true;
989  }
990 
991 
992  }
993  }
994  }
995  }
996 
997  // anything on the merge list ?
998  if(merge_list.head() != NULL){
999 
1000  // so merge them
1001  count=0;
1002  for(n_ptr=merge_list.head();n_ptr!=NULL;n_ptr=n_ptr->next())
1003  count++;
1004  cerr << "merging " << count << " nodes out of ";
1005 
1006  count=0;
1007  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next())
1008  count++;
1009  cerr << count;
1010 
1011  merge_nodes(merge_list);
1012  flag=true;
1013 
1014  count=0;
1015  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next())
1016  count++;
1017  cerr << " leaving " << count << endl;
1018 
1019 
1020  }
1021 
1022 
1023  }
1024 
1025  int c1=0,c2=0;
1026  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
1027  c1++;
1028  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL; a_ptr=a_ptr->next())
1029  c2++;
1030  }
1031 
1032  cerr << "Minimum state DFA has " << c1
1033  << " nodes and " << c2 << " arcs " << endl;
1034 
1035  merge_arcs();
1036 
1037  c1=0,c2=0;
1038  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
1039  c1++;
1040  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL; a_ptr=a_ptr->next())
1041  c2++;
1042  }
1043 
1044  cerr << "Pruned minimum state DFA has " << c1
1045  << " nodes and " << c2 << " arcs" << endl;
1046 
1047  for(i=0;i<num_nodes;i++)
1048  delete [] dst[i];
1049  delete [] dst;
1050 
1051  return true;
1052 }
1053 
1054 
1055 void
1056 Lattice::merge_nodes(EST_TList<Node*> &l)
1057 {
1058 
1059  if(l.head() == NULL)
1060  return;
1061 
1062  // make a new node with all node labels of old nodes
1063  // and all arcs of old nodes
1064 
1065  EST_Litem *n_ptr,*n2_ptr,*a_ptr,*a2_ptr;
1066  Node *new_node;
1067  new_node = new Node;
1068 
1069 
1070  // to do .. deal with final_nodes list too ....
1071 
1072 
1073 
1074  for(n_ptr=l.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
1075 
1076  // get arcs
1077  for(a_ptr=l(n_ptr)->arcs_out.head();a_ptr!=NULL;a_ptr=a_ptr->next())
1078  new_node->arcs_out.append(l(n_ptr)->arcs_out(a_ptr));
1079 
1080  // get name
1081  merge_sort_unique(new_node->name,l(n_ptr)->name);
1082 
1083  // find arcs into old nodes and make them go into new node
1084  for(n2_ptr=nodes.head();n2_ptr!=NULL;n2_ptr=n2_ptr->next()){
1085  for(a2_ptr=nodes(n2_ptr)->arcs_out.head();a2_ptr!=NULL;a2_ptr=a2_ptr->next()){
1086  if(nodes(n2_ptr)->arcs_out(a2_ptr)->to == l(n_ptr))
1087  nodes(n2_ptr)->arcs_out(a2_ptr)->to = new_node;
1088  }
1089  }
1090 
1091  }
1092 
1093 
1094  // delete old nodes, but not arcs
1095  for(n_ptr=l.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
1096  for(n2_ptr=nodes.head();n2_ptr!=NULL;n2_ptr=n2_ptr->next())
1097  if(nodes(n2_ptr) == l(n_ptr)){
1098  nodes(n2_ptr)->name.clear();
1099  nodes(n2_ptr)->arcs_out.clear();
1100  delete nodes(n2_ptr);
1101  nodes.remove(n2_ptr);
1102  }
1103  }
1104 
1105  nodes.append(new_node);
1106 
1107 }
1108 
1109 void
1110 Lattice::merge_arcs()
1111 {
1112 
1113  EST_Litem *n_ptr,*a_ptr,*a2_ptr;
1114  EST_TList<Arc*> merge_list;
1115  int count=0,count2;
1116 
1117  // find repeated arcs with the same label between two nodes
1118  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
1119 
1120  count2=0;
1121  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL; a_ptr=a_ptr->next())
1122  count2++;
1123 
1124  cerr << "merging arcs from node " << ++count
1125  << ", before:" << count2;
1126 
1127  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr->next()!=NULL; a_ptr=a_ptr->next()){
1128 
1129  merge_list.clear();
1130  for(a2_ptr=a_ptr->next();a2_ptr!=NULL; a2_ptr=a2_ptr->next())
1131 
1132  if((nodes(n_ptr)->arcs_out(a_ptr)->label ==
1133  nodes(n_ptr)->arcs_out(a2_ptr)->label) &&
1134 
1135  (nodes(n_ptr)->arcs_out(a_ptr)->to ==
1136  nodes(n_ptr)->arcs_out(a2_ptr)->to) ){
1137 
1138  delete nodes(n_ptr)->arcs_out(a2_ptr);
1139  a2_ptr=nodes(n_ptr)->arcs_out.remove(a2_ptr);
1140 
1141  }
1142 
1143  }
1144 
1145  count2=0;
1146  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL; a_ptr=a_ptr->next())
1147  count2++;
1148 
1149  cerr<< ", after:" << count2 << endl;
1150 
1151  }
1152 
1153  cerr << " \r" << endl;
1154 
1155 }
1156 
1157 
1158 void
1159 Lattice::prune_arcs(Node *node, EST_TList<Arc*> arcs)
1160 {
1161 
1162  int count=0;
1163  EST_Litem *a_ptr;
1164  for(a_ptr=arcs.head(); a_ptr != 0; a_ptr=a_ptr->next() ){
1165  prune_arc(node, arcs(a_ptr));
1166  count++;
1167  }
1168  //cerr << "pruned " << count << " arcs" << endl;
1169 }
1170 
1171 
1172 void
1173 Lattice::prune_arc(Node *node, Arc *arc)
1174 {
1175  remove_arc_from_nodes_out_list(node,arc);
1176  delete arc;
1177 }
1178 
1179 /*
1180 void
1181 Lattice::remove_arc_from_nodes_in_list(Node *n, Arc *a)
1182 {
1183  EST_Litem *a_ptr;
1184  for (a_ptr = n->arcs_in.head(); a_ptr != 0; a_ptr = a_ptr->next())
1185  if(n->arcs_in(a_ptr) == a)
1186  n->arcs_in.remove(a_ptr);
1187 }
1188 */
1189 
1190 void
1191 Lattice::remove_arc_from_nodes_out_list(Node *n, Arc *a)
1192 {
1193  EST_Litem *a_ptr;
1194  for (a_ptr = n->arcs_out.head(); a_ptr != 0; a_ptr = a_ptr->next())
1195  if(n->arcs_out(a_ptr) == a)
1196  n->arcs_out.remove(a_ptr);
1197 }
1198 
1199 /* not used
1200 bool
1201 Lattice::is_enter_node(Node *n)
1202 {
1203  // contains "!ENTER" in its list of names
1204  EST_Litem *l_ptr;
1205  for(l_ptr=n->name.head(); l_ptr != 0; l_ptr=l_ptr->next())
1206  if(n->name(l_ptr) == nmap_name_to_index("!ENTER")) // temporary !!
1207  return true;
1208  return false;
1209 }
1210 */
1211 
1212 /* Superseded now we have list of final nodes
1213 bool
1214 Lattice::is_exit_node(Node *n)
1215 {
1216  EST_Litem *l_ptr;
1217  for(l_ptr=n->name.head(); l_ptr != 0; l_ptr=l_ptr->next())
1218  if(n->name(l_ptr) == nmap_name_to_index("!EXIT")) // temporary !!
1219  return true;
1220  return false;
1221 }
1222 */
1223 
1224 EST_String
1225 Lattice::nmap_index_to_name(int index)
1226 {
1227  if(index < nmap.n())
1228  return nmap(index);
1229  else{
1230  cerr << "Warning : nmap index " << index << " out of range" << endl;
1231  return EST_String("!error!");
1232  }
1233 
1234 }
1235 
1236 int
1237 Lattice::nmap_name_to_index(const EST_String &name)
1238 {
1239  int i,j,mid;
1240 
1241  // binary search
1242  i=0;
1243  j=nmap.n()-1;
1244 
1245  while(true){
1246 
1247  mid = (i+j)/2;
1248 
1249  if(name > nmap(mid))
1250  i = mid;
1251 
1252  else if(name < nmap(mid))
1253  j = mid;
1254 
1255  else{ // name == nmap(mid)
1256  return mid; // lucky strike
1257  }
1258 
1259  if(i==j){
1260  if(name == nmap(i))
1261  return i;
1262  else{
1263  cerr << "Lattice::nmap_name_to_index failed for '"
1264  << name << "'" << endl;
1265  return -1;
1266  }
1267 
1268  }else if(j==i+1){
1269 
1270  if(name == nmap(i))
1271  return i;
1272  else if(name == nmap(j))
1273  return j;
1274  else{
1275  cerr << "Lattice::nmap_name_to_index failed for '"
1276  << name << "'" << endl;
1277  return -1;
1278  }
1279 
1280  }
1281 
1282  }
1283 
1284  return -1;
1285 }
1286 
1287 
1288 float
1289 Lattice::qmap_index_to_value(int index)
1290 {
1291  if(index < qmap.n())
1292  return qmap(index);
1293  else{
1294  cerr << "Warning : qmap index " << index << " out of range" << endl;
1295  return 1;
1296  }
1297 }
1298 
1299 
1300 int
1301 Lattice::qmap_value_to_index(float value)
1302 {
1303  int i,j,mid;
1304 
1305  // binary search
1306  i=0;
1307  j=qmap.n()-1;
1308 
1309  while(true){
1310 
1311  mid = (i+j)/2;
1312 
1313  if(value > qmap(mid))
1314  i = mid;
1315 
1316  else if(value < qmap(mid))
1317  j = mid;
1318 
1319  else
1320  return mid; // lucky strike
1321 
1322  if(i==j)
1323  return i;
1324 
1325  else if(j==i+1){
1326 
1327  if( fabs(qmap(i) - value) < fabs(qmap(j) - value) )
1328  return i;
1329  else
1330  return j;
1331  }
1332 
1333  }
1334  return 0;
1335 }
1336 
1337 int
1338 Lattice::node_index(Node *n)
1339 {
1340  EST_Litem *n_ptr;
1341  for (n_ptr = nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
1342  if(nodes(n_ptr) == n)
1343  return nodes.index(n_ptr);
1344 
1345  }
1346 
1347  //cerr << "Lattice::node_index(Node *n) couldn't find index of node " << n->name;
1348 
1349  return -1;
1350 }
1351 
1352 
1353 
1354 
1355 bool
1356 Lattice::expand()
1357 {
1358 
1359  // keep HTK happy - it can't handle multiple arcs into a node
1360  // with different word labels
1361 
1362 
1363  EST_Litem *n_ptr,*n2_ptr,*a_ptr,*w_ptr;
1364  int word;
1365  EST_TList<int> word_list;
1366  Node *new_node;
1367  Arc *new_arc;
1368  for (n_ptr = nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
1369 
1370  // find all arcs into this node
1371  word_list.clear();
1372  for (n2_ptr = nodes.head(); n2_ptr != 0; n2_ptr = n2_ptr->next()){
1373 
1374  for (a_ptr = nodes(n2_ptr)->arcs_out.head(); a_ptr != 0; a_ptr = a_ptr->next())
1375  if(nodes(n2_ptr)->arcs_out(a_ptr)->to == nodes(n_ptr)){
1376 
1377  if(nodes(n2_ptr)->arcs_out(a_ptr)->label != e_move_symbol_index){
1378  word = alphabet_index_to_symbol(nodes(n2_ptr)->arcs_out(a_ptr)->label)->nmap_index;
1379  word_list.append(word);
1380  sort_unique(word_list);
1381 
1382  }
1383  }
1384  }
1385 
1386 
1387  // if word_list.head() is NULL we should be worried
1388  if((word_list.head() != NULL) && word_list.head()->next() != NULL){
1389 
1390  // for each word make a null node, change all offending arcs to point
1391  // to it, and make another link from null node to nodes(n_ptr)
1392 
1393  for(w_ptr=word_list.head();w_ptr!=NULL;w_ptr=w_ptr->next()){
1394 
1395  // make null node
1396  new_node = new Node;
1397  new_arc = new Arc;
1398  new_arc->label = e_move_symbol_index; // no label
1399  new_arc->to = nodes(n_ptr);
1400  new_node->arcs_out.append(new_arc);
1401 
1402  // for every arc into nodes(n_ptr) with this word label, change its arcs
1403  for (n2_ptr = nodes.head(); n2_ptr != 0; n2_ptr = n2_ptr->next()){
1404 
1405  for (a_ptr = nodes(n2_ptr)->arcs_out.head(); a_ptr != 0; a_ptr = a_ptr->next()){
1406  if(nodes(n2_ptr)->arcs_out(a_ptr)->to == nodes(n_ptr)){
1407 
1408  word = alphabet_index_to_symbol(nodes(n2_ptr)->arcs_out(a_ptr)->label)->nmap_index;
1409  if(word == word_list(w_ptr)){
1410 
1411  // change the arc
1412  nodes(n2_ptr)->arcs_out(a_ptr)->to = new_node;
1413  }
1414 
1415  }
1416  }
1417  }
1418  nodes.append(new_node);
1419  }
1420  }
1421  }
1422 
1423  // need to make sure ENTER node has no arcs in - if so add a dummy ENTER node
1424 
1425  //Node *enter_node = nodes(nodes.head());
1426  bool flag=false;
1427  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
1428  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL; a_ptr=a_ptr->next()){
1429  flag=true;
1430  break;
1431  }
1432  }
1433 
1434 /*
1435  if(flag){
1436  cerr << " fixing ENTER node" << endl;
1437  new_node = new Node;
1438  new_arc = new Arc;
1439  new_arc->label = enter_node->label;
1440  new_arc->to = enter_node;
1441  new_node->arcs_out.append(new_arc);
1442  nodes.append(new_node);
1443  }
1444  */
1445 
1446  // also need to make sure there is only one EXIT node
1447  if(final_nodes.length() > 1){
1448  cerr << " making single EXIT node" << endl;
1449 
1450  // make null node
1451  new_node = new Node;
1452 
1453  for(n_ptr=final_nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
1454 
1455  new_arc = new Arc;
1456  new_arc->label = e_move_symbol_index; // no label
1457  new_arc->to = final_nodes(n_ptr);
1458  final_nodes(n_ptr)->arcs_out.append(new_arc);
1459 
1460 
1461  }
1462 
1463  // empty final node list (nodes themselves remain on nodes list)
1464  final_nodes.clear();
1465 
1466  // now a single final node
1467  nodes.append(new_node);
1468  final_nodes.append(new_node);
1469  }
1470 
1471 
1472  int c1=0,c2=0;
1473  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
1474  c1++;
1475  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL; a_ptr=a_ptr->next())
1476  c2++;
1477  }
1478 
1479  cerr << "HTKified DFA has " << c1
1480  << " nodes and " << c2 << " arcs" << endl;
1481 
1482  return true;
1483 }
1484 
1485 bool
1486 Lattice::final(Node *n)
1487 {
1488  EST_Litem *n_ptr;
1489  for (n_ptr = final_nodes.head(); n_ptr != 0; n_ptr = n_ptr->next())
1490  if(final_nodes(n_ptr) == n)
1491  return true;
1492 
1493  return false;
1494 }
1495 
1496 
1497 
1498 EST_String Lattice::name_as_string(EST_IList &l)
1499 {
1500  EST_String name;
1501  EST_Litem *l_ptr;
1502  for(l_ptr=l.head();l_ptr!=NULL;l_ptr=l_ptr->next())
1503  name+=nmap_index_to_name(l(l_ptr)) + ",";
1504 
1505  return name;
1506 }
1507 
1508 
1509 
1510 int
1511 Lattice::alphabet_index_lookup(int nmap_index, int qmap_index)
1512 {
1513  symbol_t sym;
1514  sym.nmap_index = nmap_index;
1515  sym.qmap_index = qmap_index;
1516 
1517  return alphabet_symbol_to_index(&sym);
1518 }
1519 
1520 
1522 Lattice::alphabet_index_to_symbol(int index)
1523 {
1524  if(index < alphabet.n())
1525  return &(alphabet[index]);
1526  else{
1527  cerr << "Warning : alphabet index " << index << " out of range" << endl;
1528  return NULL;
1529  }
1530 
1531 }
1532 
1533 int
1534 Lattice::alphabet_symbol_to_index(Lattice::symbol_t *sym)
1535 {
1536 
1537  int i,j,mid;
1538 
1539  // binary search
1540  i=0;
1541  j=alphabet.n()-1;
1542 
1543  while(true){
1544 
1545  mid = (i+j)/2;
1546 
1547  if(*sym > alphabet(mid))
1548  i = mid;
1549 
1550  else if(*sym < alphabet(mid))
1551  j = mid;
1552 
1553  else // *sym == alphabet(mid)
1554  return mid; // lucky strike
1555 
1556  if(i==j){
1557  if(*sym == alphabet(i))
1558  return i;
1559  else{
1560  cerr << "Lattice::alphabet_symbol_to_index failed for '"
1561  << *sym << "' 1" << endl;
1562  return -1;
1563  }
1564 
1565  }else if(j==i+1){
1566 
1567  if(*sym == alphabet(i))
1568  return i;
1569  else if(*sym == alphabet(j))
1570  return j;
1571  else{
1572  cerr << "Lattice::alphabet_symbol_to_index failed for '"
1573  << *sym << "' 2" << endl;
1574 
1575  cerr << i << " " << alphabet(i) << endl;
1576  cerr << j << " " << alphabet(j) << endl;
1577 
1578  return -1;
1579  }
1580 
1581  }
1582 
1583  }
1584 
1585  return -1;
1586 
1587 }
1588 
1589 
1590 bool
1591 Lattice::build_transition_function()
1592 {
1593 
1594  // different representation of the network , currently only for DFAs
1595  // since for a NFA each tf cell would be a list
1596 
1597  int i,j;
1598  EST_Litem *n_ptr,*a_ptr;
1599  int num_nodes = nodes.length();
1600  int num_symbols = alphabet.n();
1601 
1602  if(tf != NULL)
1603  cerr << "Warning : discarding existing transition function" << endl;
1604 
1605 
1606  tf = new int*[num_nodes];
1607  for(i=0;i<num_nodes;i++)
1608  tf[i] = new int[num_symbols];
1609 
1610  if(tf == NULL){
1611  cerr << "Not enough memory to build transition function"
1612  << "(needed " << num_nodes*num_symbols*sizeof(int) << " bytes)" << endl;
1613  return false;
1614  }
1615 
1616  for(i=0,n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next(),i++){
1617 
1618  cerr << "building transition function " << (int)((float)(i+1)*100/(float)num_nodes) << "% \r";
1619 
1620  for(j=0;j<alphabet.n();j++){
1621 
1622  tf[i][j]=-1; // means no transition
1623  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL;a_ptr=a_ptr->next()){
1624 
1625  if(j == nodes(n_ptr)->arcs_out(a_ptr)->label){
1626  tf[i][j] = node_index(nodes(n_ptr)->arcs_out(a_ptr)->to);
1627  break;
1628  }
1629  }
1630  }
1631  }
1632 
1633  cerr << endl;
1634  return true;
1635 }
1636 
1637 
1638 
1639 bool
1640 Lattice::accepts(EST_TList<symbol_t*> &string)
1641 {
1642  (void) string;
1643  return false;
1644 }
1645 
1646 
1647 float
1648 Lattice::viterbi_transduce(EST_TList<EST_String> &input,
1649  EST_TList<Arc*> &path,
1650  EST_Litem *current_symbol,
1651  Node *start_node)
1652 {
1653 
1654  // finds maximum sum-of-probs path
1655  EST_Litem *a_ptr,*best;
1656 
1657  if(start_node == NULL){
1658  start_node = nodes(nodes.head());
1659  current_symbol = input.head();
1660  path.clear();
1661  }
1662 
1663  if(current_symbol == NULL){ // consumed all input
1664  if( final(start_node) )
1665  return 0; // log prob 1
1666 
1667  else
1668  return -10000000; // hack for now
1669 
1670  }
1671 
1672  best=NULL;
1673  float max=-10000000; // hack for now
1674  for (a_ptr = start_node->arcs_out.head(); a_ptr != 0; a_ptr = a_ptr->next()){
1675 
1676  if( alphabet_index_to_symbol(start_node->arcs_out(a_ptr)->label)->nmap_index
1677  == nmap_name_to_index(input(current_symbol)) ){
1678 
1679  float x = viterbi_transduce(input,
1680  path,
1681  current_symbol->next(),
1682  start_node->arcs_out(a_ptr)->to)
1683  + qmap_index_to_value(alphabet_index_to_symbol(start_node->arcs_out(a_ptr)->label)->qmap_index);
1684 
1685  if(x > max){
1686  max = x;
1687  best = a_ptr;
1688  }
1689  }
1690  }
1691 
1692  if(best != NULL)
1693  path.append(start_node->arcs_out(best));
1694 
1695  return max;
1696 
1697 }
1698 
1699 
1700 float Lattice::viterbi_transduce(EST_Track &observations,
1701  EST_TList<Arc*> &path,
1702  float &score,
1703  int current_frame,
1704  Node *start_node)
1705 {
1706  // finds maximum sum-of-probs path
1707  EST_Litem *a_ptr,*best;
1708 
1709  if(start_node == NULL){
1710  start_node = nodes(nodes.head());
1711  current_frame = 0;
1712  path.clear();
1713  score = 0.0;
1714  }
1715 
1716  if(current_frame == observations.num_frames()){ // consumed all input
1717  if( final(start_node) )
1718  return 0; // log prob 1
1719 
1720  else
1721  return -10000000; // hack for now
1722 
1723  }
1724 
1725 
1726  if(score < -100000){ // hack !
1727  return -10000000;
1728  }
1729 
1730  best=NULL;
1731  float max=-10000000; // hack for now
1732  for (a_ptr = start_node->arcs_out.head(); a_ptr != 0; a_ptr = a_ptr->next()){
1733 
1734  int observation_element =
1735  alphabet_index_to_symbol(start_node->arcs_out(a_ptr)->label)->nmap_index
1736  - 2; // HACK !!@!!!! to skip ENTER/EXIT
1737 
1738  float this_observation = observations.a(current_frame,observation_element);
1739 
1740  //cerr << "frame " << current_frame << "," << observation_element
1741  //<< " : " << this_observation << endl;
1742 
1743  float x = viterbi_transduce(observations,
1744  path,
1745  score,
1746  current_frame+1,
1747  start_node->arcs_out(a_ptr)->to)
1748 
1749  + qmap_index_to_value(alphabet_index_to_symbol(start_node->arcs_out(a_ptr)->label)->qmap_index)
1750 
1751  + this_observation;
1752 
1753  if(x > max){
1754  max = x;
1755  best = a_ptr;
1756  }
1757 
1758  }
1759 
1760  if(best != NULL){
1761  path.append(start_node->arcs_out(best));
1762 
1763  int observation_element =
1764  alphabet_index_to_symbol(start_node->arcs_out(best)->label)->nmap_index;
1765 
1766  float this_observation = observations.a(current_frame,observation_element);
1767 
1768  score += qmap_index_to_value(alphabet_index_to_symbol(start_node->arcs_out(best)->label)->qmap_index) + this_observation;
1769 ;
1770 
1771  }
1772 
1773  cerr << max << endl;
1774 
1775  return max;
1776 
1777 }
1778 
1779 // hack for now (required for SHARED=2)
1780 bool Lattice::symbol_t::operator!=(Lattice::symbol_t const &) const {
1781 return false;
1782 }
1783 
1784 
1785 
1786