Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
relation_io.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor updated by awb */
34 /* Date : Feb 1999 */
35 /*-----------------------------------------------------------------------*/
36 /* Relation class file i/o, label files */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <cstdio>
41 #include <fstream>
42 #include "EST_unix.h"
43 #include "EST_types.h"
44 #include "ling_class/EST_Relation.h"
45 #include "EST_string_aux.h"
46 #include "EST_cutils.h"
47 #include "EST_TList.h"
48 #include "EST_Option.h"
49 #include "relation_io.h"
50 
51 #define DEF_SAMPLE_RATE 16000
52 #define HTK_UNITS_PER_SECOND 10000000
53 
54 static EST_Regex RXleadingwhitespace("^[ \t\n\r][ \t\n\r]*.*$");
55 
56 EST_read_status read_label_portion(EST_TokenStream &ts, EST_Relation &s,
57  int sample);
58 
59 EST_read_status load_esps_label(EST_TokenStream &ts,EST_Relation &rel)
60 {
61  ts.set_SingleCharSymbols(";");
62  ts.set_quotes('"','\\');
63  EST_String key, val;
64 
65  // Skip the header
66  while (!ts.eof())
67  {
68  key = ts.get().string();
69  if (key == "#")
70  break;
71 
72  val = ts.get_upto_eoln().string();
73  // delete leading whitespace
74  if (val.matches(RXleadingwhitespace))
75  val = val.after(RXwhite);
76  rel.f.set(key, val);
77  }
78 
79  if (ts.peek() == "") return format_ok;
80 
81  while (!ts.eof())
82  {
83  EST_Item *si = rel.append();
84  EST_String name;
85 
86  si->set("end",(float)atof(ts.get().string()));
87  ts.get(); // skip the color;
88 
89  for (name = ""; (!ts.eoln()) && (ts.peek() != ";"); )
90  {
91  EST_Token &t = ts.get();
92  if (name.length() > 0) // preserve internal whitespace
93  name += t.whitespace();
94  name += t.string();
95  }
96  si->set_name(name);
97 
98  if (ts.peek().string() == ";") // absorb separator
99  {
100  ts.get();
101  si->features().load(ts);
102  }
103  }
104  return format_ok;
105 }
106 
107 EST_write_status save_esps_label(const EST_String &filename,
108  const EST_Relation &s,
109  bool evaluate_ff)
110 {
111  ostream *outf;
112  if (filename == "-")
113  outf = &cout;
114  else
115  outf = new ofstream(filename);
116 
117  if (!(*outf))
118  {
119  cerr << "save_esps_label: can't open label output file \"" <<
120  filename << "\"" << endl;
121  return write_fail;
122  }
123 
124  EST_write_status st=save_esps_label(outf, s, evaluate_ff);
125 
126  if (outf != &cout)
127  delete outf;
128 
129  return st;
130 }
131 
132 EST_write_status save_esps_label(ostream *outf,
133  const EST_Relation &s,
134  bool evaluate_ff)
135 {
136  EST_Item *ptr;
137 
138  *outf << "separator ;\n";
139  if (!s.f.present("nfields"))
140  *outf << "nfields 1\n";
141 
143  for (p.begin(s.f); p; ++p)
144  *outf << p->k << " " << p->v << endl;
145 
146  *outf << "#\n";
147 /* if (f("timing_style") == "event")
148  *outf << "timing_style event\n";
149  else if (f("timing_style") == "unit")
150  *outf << "timing_style unit\n";
151 */
152 
153  for (ptr = s.head(); ptr != 0; ptr = ptr->next())
154  {
155  *outf << "\t";
156  outf->precision(5);
157  outf->setf(ios::fixed, ios::floatfield);
158  outf->width(8);
159  // outf->fill('0');
160  if (s.f("timing_style","0") == "event")
161  *outf << ptr->F("time",0);
162  else
163  *outf << ptr->F("end",0);
164 
165  *outf << " 26 \t" << ptr->S("name","0");
166 
167  EST_Features f2;
168  f2 = ptr->features();
169  f2.remove("name");
170  f2.remove("end");
171  if (evaluate_ff)
172  evaluate(ptr,f2);
173 
174  if (f2.length() > 0)
175  {
176  *outf << " ; ";
177  f2.save(*outf);
178  }
179  *outf << endl;
180  }
181 
182  return write_ok;
183 }
184 
185 EST_read_status load_ogi_label(EST_TokenStream &ts, EST_Relation &s)
186 {
187  // This function reads OGI style label files. The start, end
188  // time and names of the labels are mandatory.
189  EST_String key, val;
190  float sr;
191  int isr;
192 
193  // set up the character constant values for this stream
194  ts.set_SingleCharSymbols(";");
195 
196  // Skip over header
197 
198  while(!ts.eof())
199  {
200  if ((ts.peek().col() == 0) && (ts.peek() == "END"))
201  {
202  if (ts.peek() == "END")
203  { // read rest of header
204  ts.get();
205  ts.get();
206  ts.get();
207  }
208  break;
209  }
210  key = ts.get().string();
211  val = ts.get().string();
212  }
213 
214  sr = 1000.0 / atof(val);
215  isr = (int)sr;
216 
217  if (ts.eof())
218  {
219  cerr << "Error: couldn't find header in label file "
220  << ts.filename() << endl;
221  return wrong_format;
222  }
223 
224  if (read_label_portion(ts, s, isr) == misc_read_error)
225  {
226  cerr << "error: in label file " << ts.filename() << " at line " <<
227  ts.linenum() << endl;
228  return misc_read_error;
229  }
230  return format_ok;
231 }
232 
233 EST_read_status load_words_label(EST_TokenStream &ts, EST_Relation &s)
234 {
235  // This function reads label files in the form of simple word strings
236  // with no timing information.
237  EST_Item *item;
238 
239  while (!ts.eof())
240  {
241  item = s.append();
242  item->set("name",(EST_String)ts.get());
243  item->set("end",0.0);
244  }
245 
246  return format_ok;
247 }
248 
249 static float convert_long_num_string_to_time(const char *s,int sample)
250 {
251  // For those label files that think 100 nanosecond times are cool
252  // we have to provide a special function to convert them as
253  // this quickly gets beyond the capabilities of ints.
254 
255  if (strlen(s) < 15)
256  return atof(s)/sample;
257  else
258  {
259  double a = 0,d;
260  int i=0;
261  for (i=0;
262  (strchr(" \n\r\t",s[i]) != NULL) && (s[i] != '\0');
263  i++);
264 
265  for ( ;
266  (s[i] != '\0') && (s[i] >= '0') && (s[i] <= '9');
267  i++)
268  {
269  a = a*10;
270  d = s[i]-'0';
271  a += (d/(double)sample);
272  }
273  return a;
274  }
275 }
276 
277 EST_read_status read_label_portion(EST_TokenStream &ts, EST_Relation &s,
278  int sample)
279 {
280  EST_Item *item;
281  float hstart, hend;
282  EST_String str;
283 
284  while(!ts.eof())
285  {
286  str = ts.get().string();
287  if (str == ".")
288  return format_ok;
289 
290  item = s.append();
291 
292  hstart = convert_long_num_string_to_time(str,sample);
293  str = ts.get().string();
294  hend = convert_long_num_string_to_time(str,sample);
295 
296  item->set("end",hend); // time
297  item->set("name",ts.get().string()); // name
298 
299  if (!ts.eoln())
300  item->set("rest_lab",ts.get_upto_eoln().string());
301  }
302 
303  return format_ok;
304 }
305 
306 EST_read_status load_sample_label(EST_TokenStream &ts,
307  EST_Relation &s, int sample)
308 {
309 
310  if (sample == 0) // maybe this should be an error
311  sample = DEF_SAMPLE_RATE;
312 
313  // set up the character constant values for this stream
314  ts.set_SingleCharSymbols(";");
315 
316  s.clear();
317  if (read_label_portion(ts, s, sample) == misc_read_error)
318  {
319  cerr << "error: in label file " << ts.filename() << " at line " <<
320  ts.linenum() << endl;
321  return misc_read_error;
322  }
323  return format_ok;
324 }
325 
326 EST_write_status save_htk_label(const EST_String &filename,
327  const EST_Relation &a)
328 {
329  ostream *outf;
330  if (filename == "-")
331  outf = &cout;
332  else
333  outf = new ofstream(filename);
334 
335  if (!(*outf))
336  {
337  cerr << "save_htk_label: can't open label output file \"" <<
338  filename << "\"" << endl;
339  return write_fail;
340  }
341 
342  EST_write_status s = save_htk_label(outf, a);
343 
344 
345  if (outf != &cout)
346  delete outf;
347 
348  return s;
349 }
350 
351 EST_write_status save_htk_label(ostream *outf,
352  const EST_Relation &a)
353 {
354  EST_Item *ptr;
355  float end,start;
356 
357  outf->precision(6);
358 
359  start = end = 0;
360  for (ptr = a.head(); ptr != 0; ptr = ptr->next())
361  {
362  outf->width(15);
363  cout.setf(ios::left,ios::adjustfield);
364  *outf << (int)(start * HTK_UNITS_PER_SECOND);
365  outf->width(15);
366  end = ptr->F("end",0.0);
367  *outf << (int)(end * HTK_UNITS_PER_SECOND);
368  *outf << " " << ptr->name() << endl;
369  start = end;
370  }
371 
372  return write_ok;
373 }
374 
375 #if 0
376 EST_write_status save_label_spn(const EST_String &filename,
377  const EST_Relation &a)
378 {
379  EST_Stream_Item *ptr;
380 
381  ostream *outf;
382  if (filename == "-")
383  outf = &cout;
384  else
385  outf = new ofstream(filename);
386 
387  if (!(*outf))
388  {
389  cerr << "save_label_spn: can't open label output file \""
390  << filename << "\"" << endl;
391  return write_fail;
392  }
393 
394  ptr = a.head();
395  outf->precision(3);
396  outf->setf(ios::left, ios::adjustfield);
397  outf->width(8);
398  *outf << ptr->name();
399  outf->setf(ios::fixed, ios::floatfield);
400  outf->width(8);
401  *outf << (ptr->dur() * 1000.0) << "\t (0,140)" << endl;
402 
403  for (; next(ptr) != 0; ptr = ptr->next())
404  {
405  outf->precision(3);
406  outf->setf(ios::left, ios::adjustfield);
407  outf->width(8);
408  *outf << ptr->name();
409  outf->setf(ios::fixed, ios::floatfield);
410  outf->width(8);
411  *outf << (ptr->dur() * 1000.0) << endl;
412  }
413  // outf->precision(3);
414  // outf->setf(ios::left, ios::adjustfield);
415  outf->width(8);
416  *outf << ptr->name();
417  outf->setf(ios::fixed, ios::floatfield);
418  outf->width(8);
419  *outf << (ptr->dur() * 1000.0) << "\t (99,80)" << endl;
420 
421  if (outf != &cout)
422  delete outf;
423 
424  return write_ok;
425 }
426 
427 EST_write_status save_label_names(const EST_String &filename,
428  const EST_Relation &a,
429  const EST_String &features)
430 {
431  EST_Stream_Item *ptr;
432 
433  ostream *outf;
434  if (filename == "-")
435  outf = &cout;
436  else
437  outf = new ofstream(filename);
438 
439  if (!(*outf))
440  {
441  cerr << "save_label_name: can't open label output file \""
442  << filename << "\"" << endl;
443  return misc_write_error;
444  }
445 
446  for (ptr = a.head(); next(ptr) != 0; ptr = ptr->next())
447  {
448  *outf << ptr->name();
449  if ((features != "") && (features != "OneLine"))
450  *outf << endl;
451  else
452  *outf << " ";
453  }
454 
455  *outf << ptr->name() << endl;
456 
457  if (outf != &cout)
458  delete outf;
459  return write_ok;
460 }
461 #endif
462 
463 EST_write_status save_RelationList(const EST_String &filename,
464  const EST_RelationList &plist,
465  int time, int path)
466 {
467  EST_Litem *p;
468  EST_Item *ptr;
469  EST_String outname;
470  float start,end;
471 
472  ostream *outf;
473  if (filename == "-")
474  outf = &cout;
475  else
476  outf = new ofstream(filename);
477 
478  if (!(*outf))
479  {
480  cerr << "save_StreamList: can't open MLF output file \""
481  << filename << "\"\n";
482  return write_fail;
483  }
484 
485  *outf << "#!MLF!#\n"; // MLF header/identifier
486  outf->precision(6);
487 
488  start = end = 0;
489  for (p = plist.head(); p != 0; p = p->next())
490  {
491  outname = path ? plist(p).name() : basename(plist(p).name());
492  *outf << "\"*/" << outname<<"\"\n";
493  for (ptr = plist(p).head(); ptr != 0; ptr = ptr->next())
494  {
495  if (time)
496  {
497  outf->width(15);
498  cout.setf(ios::left,ios::adjustfield);
499  *outf << (int)(start * HTK_UNITS_PER_SECOND);
500  outf->width(15);
501  end = ptr->F("end",0.0);
502  *outf << (int)(end * HTK_UNITS_PER_SECOND) << " ";
503  start = end;
504  }
505  *outf << ptr->S("name","0") << endl;
506  }
507  *outf << ".\n";
508  }
509 
510  if (outf != &cout)
511  delete outf;
512  return write_ok;
513 }
514 
515 EST_write_status save_WordList(const EST_String &filename,
516  const EST_RelationList &plist,
517  int style)
518 {
519  EST_Litem *p;
520  EST_Item *ptr;
521 
522  ostream *outf;
523  if (filename == "-")
524  outf = &cout;
525  else
526  outf = new ofstream(filename);
527 
528  if (!(*outf))
529  {
530  cerr << "save:WordList: can't open WordList output file \""
531  << filename << "\"\n";
532  return write_fail;
533  }
534 
535  for (p = plist.head(); p != 0; p = p->next())
536  {
537  for (ptr = plist(p).head(); ptr->next() != 0; ptr = ptr->next())
538  {
539  *outf << ptr->name();
540  if (style == 0)
541  *outf << endl;
542  else
543  *outf << " ";
544  }
545  if (ptr != 0)
546  *outf << ptr->name() << endl;
547  }
548 
549  if (outf != &cout)
550  delete outf;
551  return write_ok;
552 }
553 
554 EST_write_status save_ind_RelationList(const EST_String &filename,
555  const EST_RelationList &plist,
556  const EST_String &features,
557  int path)
558 {
559  EST_Litem *p;
560  EST_String outname;
561  (void) filename;
562  (void) features;
563 
564  for (p = plist.head(); p != 0; p = p->next())
565  {
566  outname = path ? plist(p).name() : basename(plist(p).name());
567  if (plist(p).save(outname,false) != write_ok)
568  return misc_write_error;
569  }
570 
571  return write_ok;
572 }
573 
574 EST_read_status load_RelationList(const EST_String &filename,
575  EST_RelationList &plist)
576 {
577  EST_TokenStream ts;
578  EST_String fns, name;
579 
580  if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
581  {
582  cerr << "Can't open label input file " << filename << endl;
583  return misc_read_error;
584  }
585  // set up the character constant values for this stream
586  ts.set_SingleCharSymbols(";");
587 
588  // Skip over header
589  if (ts.get().string() != "#!MLF!#")
590  {
591  cerr << "Not MLF file\n";
592  return wrong_format;
593  }
594 
595  while(!ts.eof())
596  {
597  // put filename in as stream name. The filename is usually surrounded
598  // by quotes, so remove these.
599  fns = ts.get().string();
600  strip_quotes(fns);
601  EST_Relation s(fns);
602  s.f.set("name", fns); // simonk
603  plist.append(s);
604 
605  if (read_label_portion(ts, plist.last(), 10000000) == misc_read_error)
606  {
607  cerr << "error: in reading MLF file\n";
608  cerr << "section for file " << fns <<
609  " at line " << ts.linenum() << " is badly formatted\n";
610 
611  return misc_read_error;
612  }
613  }
614 
615  return format_ok;
616 }
617 
618 static void pad_ends(EST_Relation &s, float length)
619 {
620  // add evenly spaced dummy end values to Relation
621  EST_Item *p;
622  int i;
623 
624  for (i = 0, p = s.head(); p; p = p->next(), ++i)
625  p->set("end",(length * float(i)/float(s.length())));
626 }
627 
628 EST_read_status read_RelationList(EST_RelationList &plist,
629  EST_StrList &files, EST_Option &al)
630 {
631  EST_Litem *p, *plp;
632 
633  if (al.val("-itype", 0) == "mlf")
634  {
635  if (load_RelationList(files.first(), plist) != format_ok)
636  exit (-1);
637  }
638  else
639  for (p = files.head(); p; p = p->next())
640  {
641  EST_Relation s(files(p));
642  plist.append(s);
643  plp = plist.tail();
644  if (al.present("-itype"))
645  {
646  if (plist(plp).load(files(p), al.val("-itype")) != format_ok)
647  exit (-1);
648  }
649  else if (plist(plp).load(files(p)) != format_ok)
650  exit (-1);
651  if ((al.val("-itype", 0) == "words") && (al.present("-length")))
652  pad_ends(s, al.fval("-length"));
653 
654  }
655 
656  return format_ok;
657 }