Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
EST_lattice_io.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Simon King */
34 /* Date : November 1996 */
35 /*-----------------------------------------------------------------------*/
36 /* Lattice/Finite State Network i/o functions */
37 /* */
38 /*=======================================================================*/
39 
40 #include <fstream>
41 #include <cstdlib>
42 #include "EST_lattice.h"
43 #include "EST_types.h"
44 #include "EST_Token.h"
45 #include "EST_StringTrie.h"
46 
47 bool save(Lattice &lattice, EST_String filename)
48 {
49  ostream *outf;
50  EST_Litem *n_ptr, *a_ptr;
51  int acount=0,ncount=0;
52  int i,from,to;
53  Lattice::symbol_t *symbol;
54  EST_String word;
55 
56  if (filename == "-")
57  outf = &cout;
58  else
59  outf = new ofstream(filename);
60 
61  if (!(*outf))
62  {
63  cerr << "lattice save: can't open lattice output file \""
64  << filename << "\"" << endl;
65  return false;
66  }
67 
68  // count
69  for (n_ptr = lattice.nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
70  ncount++;
71  for (a_ptr = lattice.nodes(n_ptr)->arcs_out.head();
72  a_ptr != 0; a_ptr = a_ptr->next())
73  acount++;
74  }
75 
76  // size line (duh!)
77  *outf << "# " << "Generated by Edinburgh Speech Tools" << endl << "#" << endl;
78  *outf << "# Header" << endl;
79  *outf << "VERSION=1.1" << endl << "#" << endl;
80  *outf << "# Size line" << endl;
81  *outf << "N=" << ncount << " L=" << acount << endl;
82  *outf << "#" << endl;
83 
84 
85  // to do : for HTK name nodes, not arcs
86  // since all arcs in have same label
87 
88  // nodes
89  for(i=0;i<=1;i++){
90 
91  if(i==0)
92  *outf << "# Nodes" << endl;
93  else
94  *outf << "# Arcs" << endl;
95 
96  ncount=0;
97  acount=0;
98 
99  for (n_ptr = lattice.nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
100 
101  from=lattice.node_index(lattice.nodes(n_ptr));
102 
103  if(i==0){
104  *outf << "I=" << from << endl;
105 
106  }else
107  for (a_ptr = lattice.nodes(n_ptr)->arcs_out.head();
108  a_ptr != 0; a_ptr = a_ptr->next()){
109 
110  to = lattice.node_index(lattice.nodes(n_ptr)->arcs_out(a_ptr)->to);
111 
112  symbol = lattice.alphabet_index_to_symbol(lattice.nodes(n_ptr)->arcs_out(a_ptr)->label);
113 
114  if(lattice.nodes(n_ptr)->arcs_out(a_ptr)->label == lattice.e_move_symbol_index){
115  *outf << "J=" << acount++ << " S=" << from << " E=" << to
116  << " W=!NULL" << endl;
117 
118  }else{
119  *outf << "J=" << acount++ << " S=" << from << " E=" << to
120  << " l=" << lattice.qmap_index_to_value(symbol->qmap_index)
121  << " W=" << lattice.nmap_index_to_name(symbol->nmap_index)
122  << endl;
123  }
124  }
125  }
126  }
127 
128  return true;
129 }
130 
131 bool
132 load(Lattice &lattice,EST_String filename)
133 {
134 
135  EST_String name, next_token, str;
136  EST_TokenStream ts;
137  EST_Token t;
138  int i,j;
139  // temporary storage
140  struct arc_t{
141  int start;
142  int end;
143  float logprob;
144  EST_String word;
145  };
146 
147  arc_t *temp_arcs = NULL;
148  int arcindex=0;
149  int nodeindex=0;
150 
151  // nodes can have labels too - but this is not yet supported
152 
153 
154 
155  if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
156  {
157  cerr << "Can't open lattice input file " << filename << endl;
158  return false;
159  }
160 
161  // read file into a arrays, make alphabet, then make lattice
162 
163  // find 'size' line
164 
165  int numnodes=0;
166  int numarcs=0;
167 
168  int narcs=-1;
169  int nnodes=-1;
170  int nodenum=-1;
171  int arcnum=-1;
172  int startnode=-1;
173  int endnode=-1;
174  float logprob = 0.0;
175  EST_String word="";
176 
177  while(!ts.eof())
178  {
179 
180  str = ts.get().string();
181 
182  if(!str.contains("="))
183  continue;
184 
185  EST_String left=str.before("=");
186  EST_String right=str.after("=");
187 
188  if(left == "N")
189  nnodes=atoi(right);
190  else if(left == "L")
191  narcs=atoi(right);
192  else if(left == "I")
193  nodenum=atoi(right);
194  else if(left == "J")
195  arcnum=atoi(right);
196  else if(left == "S")
197  startnode=atoi(right);
198  else if(left == "E")
199  endnode=atoi(right);
200  else if(left == "l")
201  logprob=atof(right);
202  else if(left == "W")
203  word=right;
204 
205 
206  if(ts.eoln()){
207 
208  // do something
209 
210  if( (narcs>0) && (nnodes>0) ){
211  // it's the size line
212  if(temp_arcs != NULL){
213  cerr << "Error in lattice file : 2 size lines found"
214  << " : line " << ts.linenum() << endl;
215  ts.close();
216  return false;
217  }else{
218  numarcs=narcs;
219  numnodes=nnodes;
220  temp_arcs = new arc_t[numarcs];
221  cerr << "size : " << numnodes << " " << numarcs << endl;
222  }
223 
224  }else if(nodenum>=0){
225 
226  if(arcnum>0){
227  cerr << "Error in lattice file at line "
228  << ts.linenum() << endl;
229  ts.close();
230  return false;
231  }
232 
233  if(nodenum>=numnodes){
234  cerr << "Error in lattice file at line "
235  << ts.linenum()
236  << " : node index (" << nodenum << ") out of range"
237  << endl;
238  ts.close();
239  return false;
240  }
241 
242  nodeindex++;
243 
244  }else if(arcnum>=0){
245  if(nodenum>0){
246  cerr << "Error in lattice file at line "
247  << ts.linenum() << endl;
248  ts.close();
249  return false;
250  }
251 
252  if(arcnum>=numarcs){
253  cerr << "Error in lattice file at line "
254  << ts.linenum()
255  << " : arc index (" << arcnum << ") out of range"
256  << endl;
257  ts.close();
258  return false;
259  }
260 
261  if((startnode<0) || (startnode>=numnodes)){
262  cerr << "Error in lattice file at line " << ts.linenum()
263  << endl
264  << " arc starts at out of range node "
265  << startnode << endl;
266  return false;
267  }
268 
269  if((endnode<0) || (endnode>=numnodes)){
270  cerr << "Error in lattice file at line " << ts.linenum()
271  << endl
272  << " arc ends at out of range node "
273  << endnode << endl;
274  return false;
275  }
276 
277  // make arc
278  temp_arcs[arcindex].start = startnode;
279  temp_arcs[arcindex].end = endnode;
280  temp_arcs[arcindex].logprob = logprob;
281  temp_arcs[arcindex].word = word;
282  arcindex++;
283 
284  }
285 
286  narcs=-1;
287  nnodes=-1;
288  nodenum=-1;
289  arcnum=-1;
290  startnode=-1;
291  endnode=-1;
292  logprob=-1;
293  word="";
294 
295  }
296 
297  }
298 
299  if(arcindex != numarcs){
300  cerr << "Error in lattice file at line "
301  << "found " << arcindex << " arcs, but expected "
302  << numarcs << endl;
303  return false;
304  }
305 
306  if(nodeindex != numnodes){
307  cerr << "Error in lattice file at line "
308  << "found " << nodeindex << " nodes, but expected "
309  << numnodes << endl;
310  return false;
311  }
312 
313 
314  // make nmap
315  EST_StringTrie seen_before;
316  EST_StrList list_nmap;
317 
318  for(i=0;i<numarcs;i++){
319 
320  if(seen_before.lookup(temp_arcs[i].word) != (void *)(1)){
321  seen_before.add(temp_arcs[i].word,(void *)(1));
322  list_nmap.append(temp_arcs[i].word);
323  }
324  }
325  qsort(list_nmap);
326 
327  //cerr << "here is the list nmap" << list_nmap << endl;
328 
329  i=0;
330  EST_Litem *l_ptr;
331  bool flag;
332  for(l_ptr=list_nmap.head();l_ptr != 0; l_ptr=l_ptr->next())
333  i++;
334 
335  // transfer to array
336  lattice.nmap.resize(i);
337  i=0;
338  for(l_ptr=list_nmap.head();l_ptr != 0; l_ptr=l_ptr->next())
339  lattice.nmap[i++] = list_nmap(l_ptr);
340 
341  list_nmap.clear();
342  cerr << "Built nmap with " << i << " entries" << endl;
343 
344 
345  // make qmap
346  // should be a separate fn
347 
348  EST_TList<float> list_qmap;
349 
350  float error_margin = 1.0e-02; // temporary hack
351 
352  for(i=0;i<numarcs;i++){
353 
354  flag = false;
355  for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
356  if(fabs(list_qmap(l_ptr) - temp_arcs[i].logprob) <= error_margin){
357  flag = true;
358  break;
359  }
360 
361  if(!flag)
362  list_qmap.append(temp_arcs[i].logprob);
363 
364  }
365 
366  // special zero (within error_margin) entry, if not already there
367  flag = false;
368  for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
369  if(fabs(list_qmap(l_ptr)) <= error_margin){
370  flag = true;
371  break;
372  }
373 
374  if(!flag)
375  list_qmap.append(0);
376 
377  qsort(list_qmap);
378 
379  i=0;
380  for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
381  i++;
382 
383  // transfer to array
384  lattice.qmap.resize(i);
385  i=0;
386  for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
387  lattice.qmap[i++] = list_qmap(l_ptr);
388 
389  list_qmap.clear();
390  cerr << "Built qmap with " << i << " entries" << endl;
391 
392 
393  // make alphabet
394 
395  // temporary list
396  bool **used; // index nmap,qmap
397  int nl = lattice.nmap.n();
398  int ql = lattice.qmap.n();
399  used = new bool*[nl];
400  for(i=0;i<nl;i++)
401  used[i] = new bool[ql];
402 
403  for(i=0;i<nl;i++)
404  for(j=0;j<ql;j++)
405  used[i][j] = false;
406 
407  // get all combinations of word and log probability actually used
408  for(i=0;i<numarcs;i++){
409 
410  //cerr << "arc " << i << " " << temp_arcs[i].logprob
411  //<< " " << temp_arcs[i].word << endl;
412 
413  used[lattice.nmap_name_to_index(temp_arcs[i].word)][lattice.qmap_value_to_index(temp_arcs[i].logprob)] = true;
414  }
415 
416  int count = 0;
417  for(i=0;i<nl;i++)
418  for(j=0;j<ql;j++)
419  if(used[i][j])
420  count++;
421 
422  lattice.alphabet.resize(count);
423  count=0;
424  Lattice::symbol_t *sym;
425  // ordered this way so already sorted, first by q then by n
426  for(j=0;j<ql;j++)
427  for(i=0;i<nl;i++)
428  if(used[i][j]){
429  sym = new Lattice::symbol_t;
430  sym->nmap_index=i;
431  sym->qmap_index=j;
432  lattice.alphabet[count++] = *sym;
433 
434  }
435 
436  cerr << "Alphabet has " << count << " symbols " << endl;
437 
438  // make lattice itself
439 
440  // nodes
441  for(i=0;i<numnodes;i++){
442  Lattice::Node *new_node;
443  new_node = new Lattice::Node;
444  lattice.nodes.append(new_node);
445 
446  }
447 
448  // arcs
449  EST_Litem *n_ptr;
450  for(j=0;j<numarcs;j++){
451 
452  // find from and to nodes by counting down node list
453 
454  // from node
455 
456  for (n_ptr =lattice. nodes.head(),count=0;
457  count<temp_arcs[j].start;
458  n_ptr = n_ptr->next(),count++){
459 
460  if(n_ptr == NULL){
461  cerr << "Couldn't find 'from' node ";
462  return false;
463  }
464  }
465  Lattice::Node *from_node = lattice.nodes(n_ptr);
466 
467  // double check
468  if(from_node == NULL){
469  cerr << "Couldn't find from node, index "
470  << temp_arcs[j].start << endl;
471  return false;
472  }
473 
474 
475  for (n_ptr = lattice.nodes.head(),count=0;
476  count<temp_arcs[j].end;
477  n_ptr = n_ptr->next(),count++){
478 
479  if(n_ptr == NULL){
480  cerr << "Couldn't find 'to' node ";
481  return false;
482  }
483  }
484  Lattice::Node *to_node = lattice.nodes(n_ptr);
485 
486  // double check
487  if(to_node == NULL){
488  cerr << "Couldn't find to node, index "
489  << temp_arcs[j].end << endl;
490  return false;
491  }
492 
493 
494  int word_index = lattice.nmap_name_to_index(temp_arcs[j].word);
495 
496  // get arc symbol
497  int symbol = lattice.alphabet_index_lookup(word_index,
498  lattice.qmap_value_to_index(temp_arcs[j].logprob));
499  if(symbol < 0){
500  cerr << "Couldn't lookup symbol in alphabet !" << endl;
501  return false;
502  }
503 
504  Lattice::Arc *new_arc = new Lattice::Arc;
505  new_arc->label = symbol;
506  new_arc->to = to_node;
507 
508  if(to_node->name.head() == NULL)
509  to_node->name.append(word_index); // only name of first arc in .. !
510 
511  from_node->arcs_out.append(new_arc);
512 
513  }
514 
515  // find final nodes
516  for (n_ptr = lattice.nodes.head(),count=0;
517  n_ptr!= NULL;
518  n_ptr = n_ptr->next()){
519 
520  if(lattice.nodes(n_ptr)->arcs_out.head() == NULL){
521  lattice.final_nodes.append(lattice.nodes(n_ptr));
522  count++;
523  }
524  }
525 
526  cerr << "found " << count << " final nodes" << endl;
527 
528 
529 
530  cerr << "Lattice loaded !" << endl;
531 
532  delete [] used;
533  delete [] temp_arcs;
534 
535  return true;
536 }
537