Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
EST_UtteranceFile.cc
1  /************************************************************************/
2  /* */
3  /* Centre for Speech Technology Research */
4  /* University of Edinburgh, UK */
5  /* Copyright (c) 1996,1997 */
6  /* All Rights Reserved. */
7  /* */
8  /* Permission is hereby granted, free of charge, to use and distribute */
9  /* this software and its documentation without restriction, including */
10  /* without limitation the rights to use, copy, modify, merge, publish, */
11  /* distribute, sublicense, and/or sell copies of this work, and to */
12  /* permit persons to whom this work is furnished to do so, subject to */
13  /* the following conditions: */
14  /* 1. The code must retain the above copyright notice, this list of */
15  /* conditions and the following disclaimer. */
16  /* 2. Any modifications must be clearly marked as such. */
17  /* 3. Original authors' names are not deleted. */
18  /* 4. The authors' names are not used to endorse or promote products */
19  /* derived from this software without specific prior written */
20  /* permission. */
21  /* */
22  /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23  /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24  /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25  /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26  /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27  /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28  /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29  /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30  /* THIS SOFTWARE. */
31  /* */
32  /*************************************************************************/
33  /* */
34  /* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
35  /* -------------------------------------------------------------------- */
36  /* Functions to load and save utterances in various formats. */
37  /* */
38  /*************************************************************************/
39 
40 #include <cstdlib>
41 #include <cstdio>
42 #include <iostream>
43 #include <fstream>
44 #include "EST_string_aux.h"
45 #include "EST_FileType.h"
46 #include "EST_Token.h"
47 #include "ling_class/EST_Utterance.h"
48 #include "EST_UtteranceFile.h"
49 
50 static EST_read_status load_all_contents(EST_TokenStream &ts,
51 // EST_THash<int,EST_Val> &sitems,
53  int &max_id);
54 static EST_read_status load_relations(EST_TokenStream &ts,
55  EST_Utterance &utt,
57 // const EST_THash<int,EST_Val> &sitems
58  );
59 // static EST_write_status save_est_ascii(ostream &outf,const EST_Utterance &utt);
60 static EST_write_status utt_save_all_contents(ostream &outf,
61  const EST_Utterance &utt,
62  EST_TKVL<void *,int> &sinames);
63 static EST_write_status utt_save_all_contents(ostream &outf,
64  EST_Item *n,
65  EST_TKVL<void *,int> &sinames,
66  int &si_count);
67 static EST_write_status utt_save_ling_content(ostream &outf,
68  EST_Item *si,
69  EST_TKVL<void *,int> &sinames,
70  int &si_count);
71 
72 static void node_tidy_up(int &k, EST_Item_Content *node)
73 {
74  // Called to delete the nodes in the hash table when a load
75  (void)k;
76 
77  if (node->unref_relation("__READ__"))
78  delete node;
79 }
80 
81 EST_read_status EST_UtteranceFile::load_est_ascii(EST_TokenStream &ts,
82  EST_Utterance &u,
83  int &max_id)
84 {
85  EST_Option hinfo;
86  bool ascii;
87  EST_EstFileType t;
88  EST_read_status r;
89  // EST_THash<int,EST_Val> sitems(100);
90 
92 
93  // set up the character constant values for this stream
94  ts.set_SingleCharSymbols(";()");
95  ts.set_quotes('"','\\');
96 
97  if ((r = read_est_header(ts, hinfo, ascii, t)) != format_ok)
98  return r;
99  if (t != est_file_utterance)
100  return misc_read_error;
101  if (hinfo.ival("version") != 2)
102  {
103  if (hinfo.ival("version") == 3)
104  EST_warning("Loading est utterance format version 3, ladders will not be understood");
105  else
106  {
107  EST_error("utt_load: %s wrong version of utterance format expected 2 (or 3) but found %d",
108  (const char *)ts.pos_description(), hinfo.ival("version"));
109  }
110  }
111 
112  // Utterance features
113  if (ts.get() != "Features")
114  {
115  cerr << "utt_load: " << ts.pos_description() <<
116  " missing utterance features section" << endl;
117  return misc_read_error;
118  }
119  else
120  u.f.load(ts);
121  // items
122  if (ts.get() != "Stream_Items")
123  {
124  cerr << "utt_load: " << ts.pos_description() <<
125  " missing Items section" << endl;
126  return misc_read_error;
127  }
128  max_id = 0;
129  r = load_all_contents(ts, sitems, max_id);
130 
131  // Only exist in older form utterances so soon wont be necessary
132  if (ts.peek() == "Streams")
133  {
134  cerr << "utt.load: streams found in utterance file, " <<
135  "no longer supported" << endl;
136  return misc_read_error;
137  }
138 
139  // Relations
140  if ((r == format_ok) && (ts.get() != "Relations"))
141  {
142  cerr << "utt_load: " << ts.pos_description() <<
143  " missing Relations section" << endl;
144  return misc_read_error;
145  }
146 
147  r = load_relations(ts, u, sitems);
148 
149  if ((r == format_ok) && (ts.get() != "End_of_Utterance"))
150  {
151  cerr << "utt_load: " << ts.pos_description() <<
152  " End_of_Utterance expected but not found" << endl;
153  return misc_read_error;
154  }
155 
156  // if (r != format_ok)
157  // {
158  // This works because even if some of these si's have been
159  // linked to nodes they will be unlink when the si is destroyed
160  for(int ni=0; ni < sitems.length(); ni++)
161  {
162  EST_Item_Content *c = sitems[ni];
163  if (c != NULL)
164  node_tidy_up(ni, c);
165  }
166  // }
167 
168  return r;
169 
170 }
171 
172 static EST_read_status load_all_contents(EST_TokenStream &ts,
173 // EST_THash<int,EST_Val> &sitems,
175  int &max_id)
176 {
177  // Load items into table with names for later reference
178  // by relations
179  EST_String Sid;
180  bool ok;
181  int id,idval;
182 
183  while (ts.peek() != "End_of_Stream_Items")
184  {
186 
187  si->relations.add_item("__READ__", est_val((EST_Item *)NULL), 1);
188 
189  id = 0;
190 
191  Sid = ts.get().string();
192 
193  id = Sid.Int(ok);
194  if (!ok)
195  {
196  cerr << "utt_load: " << ts.pos_description() <<
197  " Item name not a number: " << Sid << endl;
198  return misc_read_error;
199  }
200  if (id >= sitems.length())
201  {
202  sitems.resize(id*2, 1);
203  }
204  sitems[id] = si;
205  // sitems.add_item(id,est_val(si));
206  if (si->f.load(ts) != format_ok)
207  return misc_read_error;
208  idval = si->f.I("id",0);
209  if (idval > max_id)
210  max_id = idval;
211  if (ts.eof())
212  return misc_read_error; // just in case this happens
213  }
214 
215  ts.get(); // skip "End_of_Stream_Items"
216 
217  return format_ok;
218 }
219 
220 static EST_read_status load_relations(EST_TokenStream &ts,
221  EST_Utterance &utt,
223 // const EST_THash<int,EST_Val> &sitems
224  )
225 {
226  // Load relations
227 
228  while (ts.peek() != "End_of_Relations")
229  {
230  // can't use create relation as we don't know its name until
231  // after its loaded
232  EST_Relation *r = new EST_Relation;
233 
234  if (r->load(ts,sitems) != format_ok)
235  return misc_read_error;
236 
237  r->set_utt(&utt);
238  utt.relations.set_val(r->name(),est_val(r));
239 
240  if (ts.eof())
241  return misc_read_error;
242  }
243 
244  ts.get(); // Skip "End_of_Relations"
245 
246  return format_ok;
247 }
248 
249 
250 EST_write_status EST_UtteranceFile::save_est_ascii(ostream &outf,const EST_Utterance &utt)
251 {
252  EST_write_status v = write_ok;
253 
254  outf.precision(8);
255  outf.setf(ios::fixed, ios::floatfield);
256  outf.width(8);
257 
258  outf << "EST_File utterance\n"; // EST header identifier.
259  outf << "DataType ascii\n";
260  outf << "version 2\n";
261  outf << "EST_Header_End\n"; // EST end of header identifier.
262 
263  // Utterance features
264  outf << "Features ";
265  utt.f.save(outf);
266  outf << endl;
267 
268  outf << "Stream_Items\n";
269  EST_TKVL<void *,int> sinames;
270  v = utt_save_all_contents(outf,utt,sinames);
271  if (v == write_fail) return v;
272  outf << "End_of_Stream_Items\n";
273 
274  // Relations
275  outf << "Relations\n";
277  for (p.begin(utt.relations); p; p++)
278  {
279  v = relation(p->v)->save(outf,sinames);
280  if (v == write_fail) return v;
281  }
282  outf << "End_of_Relations\n";
283 
284  outf << "End_of_Utterance\n";
285  return write_ok;
286 }
287 
288 static EST_write_status utt_save_all_contents(ostream &outf,
289  const EST_Utterance &utt,
290  EST_TKVL<void *,int> &sinames)
291 {
292  // Write out all stream items in the utterance, as they may appear in
293  // various places in an utterance keep a record of which ones
294  // have been printed and related them to names for reference by
295  // the Relations (and older Stream architecture).
296  int si_count = 1;
297  EST_write_status v = write_ok;
298 
299  // Find the stream items in the relations
301  for (p.begin(utt.relations); p; p++)
302  {
303  v = utt_save_all_contents(outf,relation(p->v)->head(),
304  sinames,si_count);
305  if (v == write_fail) return v;
306  }
307 
308  return v;
309 }
310 
311 static EST_write_status utt_save_all_contents(ostream &outf,
312  EST_Item *n,
313  EST_TKVL<void *,int> &sinames,
314  int &si_count)
315 {
316  if (n == 0)
317  return write_ok;
318  else
319  {
320  utt_save_ling_content(outf,n,sinames,si_count);
321  // As we have more complex structures this will need to
322  // be updated (i.e. we'll need a marking method for nodes)
323  utt_save_all_contents(outf,n->next(),sinames,si_count);
324  utt_save_all_contents(outf,n->down(),sinames,si_count);
325  }
326  return write_ok;
327 }
328 
329 static EST_write_status utt_save_ling_content(ostream &outf,
330  EST_Item *si,
331  EST_TKVL<void *,int> &sinames,
332  int &si_count)
333 {
334  // Save item and features if not already saved
335 
336  if ((si != 0) && (!sinames.present(si->contents())))
337  {
338  sinames.add_item(si->contents(),si_count);
339  outf << si_count << " ";
340  si->features().save(outf);
341  outf << endl;
342  si_count++;
343  }
344  return write_ok;
345 }
346 
347 EST_read_status EST_UtteranceFile::load_xlabel(EST_TokenStream &ts,
348  EST_Utterance &u,
349  int &max_id)
350 {
351  (void)max_id;
352  EST_read_status status = read_ok;
353 
354  u.clear();
355 
356  EST_Relation *rel = u.create_relation("labels");
357 
358  status = rel->load("", ts, "esps");
359 
360  EST_Item *i = rel->head();
361  float t=0.0;
362 
363  while (i != NULL)
364  {
365  i->set("start", t);
366  t = i->F("end");
367  i = i->next();
368  }
369 
370  return status;
371 }
372 
373 EST_write_status EST_UtteranceFile::save_xlabel(ostream &outf,
374  const EST_Utterance &utt)
375 {
376  EST_write_status status = write_error;
377 
378  EST_Relation *rel;
379 
381 
382  for (p.begin(utt.relations); p; p++)
383  {
384  rel = ::relation(p->v);
385 
386  EST_Item * hd = rel->head();
387 
388 
389  while (hd)
390  {
391  if (hd->up() || hd->down())
392  break;
393  hd=hd->next();
394  }
395 
396  // didn't find anything => this is linear
397  if(!hd)
398  return rel->save(outf, "esps", 0);
399  }
400 
401  // Found no linear relations
402 
403  return status;
404 }
405 
406 #if defined(INCLUDE_XML_FORMATS)
407 
408 #include "genxml.h"
409 #include "apml.h"
410 
411 // APML support
412 EST_read_status EST_UtteranceFile::load_apml(EST_TokenStream &ts,
413  EST_Utterance &u,
414  int &max_id)
415 {
416  FILE *stream;
417 
418  if ((stream=ts.filedescriptor())==NULL)
419  return read_error;
420 
421  long pos=ftell(stream);
422 
423  {
424  char buf[80];
425 
426  fgets(buf, 80, stream);
427 
428  if (strncmp(buf, "<?xml", 5) != 0)
429  return read_format_error;
430 
431  fgets(buf, 80, stream);
432 
433  if (strncmp(buf, "<!DOCTYPE apml", 14) != 0)
434  return read_format_error;
435  }
436 
437  fseek(stream, pos, 0);
438 
439  EST_read_status stat = apml_read(stream, ts.filename(),u, max_id);
440 
441  if (stat != read_ok)
442  fseek(stream, pos, 0);
443 
444  return stat;
445 }
446 
447 
448 // GenXML support
449 
450 EST_read_status EST_UtteranceFile::load_genxml(EST_TokenStream &ts,
451  EST_Utterance &u,
452  int &max_id)
453 {
454  FILE *stream;
455 
456  if ((stream=ts.filedescriptor())==NULL)
457  return read_error;
458 
459  long pos=ftell(stream);
460 
461  {
462  char buf[80];
463 
464  fgets(buf, 80, stream);
465 
466  if (strncmp(buf, "<?xml", 5) != 0)
467  return read_format_error;
468  }
469 
470  fseek(stream, pos, 0);
471 
472  EST_read_status stat = EST_GenXML::read_xml(stream, ts.filename(),u, max_id);
473 
474  if (stat != read_ok)
475  fseek(stream, pos, 0);
476 
477  return stat;
478 }
479 
480 EST_write_status EST_UtteranceFile::save_genxml(ostream &outf,
481  const EST_Utterance &utt)
482 {
483  EST_write_status status=write_ok;
484 
485  EST_TStringHash<int> features(20);
486 
488 
489  for (p.begin(utt.relations); p; ++p)
490  {
491  EST_Relation *rel = ::relation(p->v);
492 
493  EST_Item * hd = rel->head();
494 
495  while (hd)
496  {
498  for (fp.begin(hd->features()); fp; ++fp)
499  features.add_item(fp->k, 1);
500  hd=hd->next();
501  }
502  }
503 
504  outf << "<?xml version='1.0'?>\n";
505 
506  outf << "<!DOCTYPE utterance PUBLIC '//CSTR EST//DTD cstrutt//EN' 'cstrutt.dtd'\n\t[\n";
507 
509 
510  outf << "\t<!ATTLIST item\n";
511  for (f.begin(features); f; ++f)
512  {
513  if (f->k != "id")
514  {
515  outf << "\t\t" << f->k << "\tCDATA #IMPLIED\n";
516  }
517  }
518 
519  outf << "\t\t>\n";
520 
521  outf << "\t]>\n";
522 
523  outf << "<utterance>\n";
524 
525  outf << "<language name='unknown'/>\n";
526 
527  for (p.begin(utt.relations); p; ++p)
528  {
529  EST_Relation *rel = ::relation(p->v);
530 
531  EST_Item * hd = rel->head();
532 
533 
534  while (hd)
535  {
536  if (hd->up() || hd->down())
537  break;
538  hd=hd->next();
539  }
540 
541  // didn't find anything => this is linear
542  if(!hd)
543  {
544  outf << "<relation name='"<< rel->name()<< "' structure-type='list'>\n";
545 
546  hd = rel->head();
547  while (hd)
548  {
549  outf << " <item\n";
550 
552  for (p.begin(hd->features()); p; ++p)
553  if (p->k != "estContentFeature")
554  outf << " " << p->k << "='" << p->v << "'\n";
555 
556  outf << " />\n";
557 
558  hd=hd->next();
559  }
560 
561  outf << "</relation>\n";
562  }
563  else // for now give an error for non-linear relations
564  status=write_partial;
565  }
566 
567 
568  outf << "</utterance>\n";
569 
570  return status;
571 ;
572 }
573 #endif
574 
575 EST_String EST_UtteranceFile::options_short(void)
576 {
577  EST_String s("");
578 
579  for(int n=0; n< EST_UtteranceFile::map.n() ; n++)
580  {
581  EST_UtteranceFileType type = EST_UtteranceFile::map.nth_token(n);
582  if (type != uff_none)
583  {
584  for(int ni=0; ni<NAMED_ENUM_MAX_SYNONYMS; ni++)
585  {
586  const char *nm = EST_UtteranceFile::map.name(type, ni);
587  if (nm==NULL)
588  break;
589 
590  if (s != "")
591  s += ", ";
592 
593  s += nm;
594  }
595  }
596  }
597  return s;
598 }
599 
600 EST_String EST_UtteranceFile::options_supported(void)
601 {
602  EST_String s("Available utterance file formats:\n");
603 
604  for(int n=0; n< EST_UtteranceFile::map.n() ; n++)
605  {
606  EST_UtteranceFileType type = EST_UtteranceFile::map.nth_token(n);
607  if (type != uff_none)
608  {
609  const char *d = EST_UtteranceFile::map.info(type).description;
610  for(int ni=0; ni<NAMED_ENUM_MAX_SYNONYMS; ni++)
611  {
612  const char *nm = EST_UtteranceFile::map.name(type, ni);
613  if (nm==NULL)
614  break;
615 
616  s += EST_String::cat(" ", (nm?nm:"NULL"), EST_String(" ")*(12-strlen((nm?nm:"NULL"))), (d?d:"NULL"), "\n");
617  }
618  }
619  }
620  return s;
621 }
622 
623 
624 
625 // note the order here defines the order in which loads are tried.
626 Start_TNamedEnumI_T(EST_UtteranceFileType, EST_UtteranceFile::Info, EST_UtteranceFile::map, utterancefile)
627  { uff_none, { NULL },
628  { FALSE, NULL, NULL, "unknown utterance file type"} },
629  { uff_est, { "est", "est_ascii"},
630  { TRUE, EST_UtteranceFile::load_est_ascii, EST_UtteranceFile::save_est_ascii, "Standard EST Utterance File" } },
631 #if defined(INCLUDE_XML_FORMATS)
632  { uff_apml, { "apml", "xml"},
633  { TRUE, EST_UtteranceFile::load_apml, NULL, "Utterance in APML" } },
634  { uff_genxml, { "genxml", "xml"},
635  { TRUE, EST_UtteranceFile::load_genxml, EST_UtteranceFile::save_genxml, "Utterance in XML, Any DTD" } },
636 #endif
637  { uff_xlabel, { "xlabel"},
638  { TRUE, EST_UtteranceFile::load_xlabel, EST_UtteranceFile::save_xlabel, "Xwaves Label File" } },
639  { uff_none, {NULL},
640  { FALSE, NULL, NULL, "unknown utterance file type"} }
641 
642 End_TNamedEnumI_T(EST_UtteranceFileType, EST_UtteranceFile::Info, EST_UtteranceFile::map, utterancefile)
643 
644 Declare_TNamedEnumI(EST_UtteranceFileType, EST_UtteranceFile::Info)
645 
646 #if defined(INSTANTIATE_TEMPLATES)
647 #include "../base_class/EST_TNamedEnum.cc"
648 Instantiate_TNamedEnumI(EST_UtteranceFileType, EST_UtteranceFile::Info)
649 #endif
650 
651 Declare_TVector_Base_T(EST_Item_Content *, NULL, NULL, EST_Item_ContentP)
652 
653 #if defined(INSTANTIATE_TEMPLATES)
654 
655 #include "../base_class/EST_TSimpleVector.cc"
656 #include "../base_class/EST_TVector.cc"
657 #include "../base_class/EST_Tvectlist.cc"
658 
659 Instantiate_TVector_T(EST_Item_Content *, EST_Item_ContentP)
660 
661 #endif
662