Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
EST_relation_aux.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor and Simon King */
34 /* Date : June 1995 */
35 /*-----------------------------------------------------------------------*/
36 /* Relation class auxiliary routines */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <iostream>
41 #include <fstream>
42 #include <cmath>
43 #include "EST_types.h"
44 #include "ling_class/EST_Relation.h"
45 #include "ling_class/EST_relation_aux.h"
46 #include "EST_string_aux.h"
47 #include "EST_io_aux.h"
48 #include "EST_Option.h"
49 #include "EST_Token.h"
50 
51 static int is_in_class(const EST_String &name, EST_StrList &s);
52 
53 bool dp_match(const EST_Relation &lexical,
54  const EST_Relation &surface,
55  EST_Relation &match,
56  float ins, float del, float sub);
57 
58 
59 float start(EST_Item *n)
60 {
61  return (n->prev() == 0) ? 0.0 : n->prev()->F("end");
62 }
63 
64 float duration(EST_Item *n)
65 {
66  return n->F("end") - start(n);
67 }
68 
69 void quantize(EST_Relation &a, float q)
70 {
71  EST_Item *a_ptr;
72  float end;
73 
74  for (a_ptr = a.head(); a_ptr != 0; a_ptr = a_ptr->next())
75  {
76  end = a_ptr->F("end") / q;
77  end = rint(end);
78  end = end * q;
79  a_ptr->set("end", end);
80  }
81 }
82 
83 // edit labels using a sed file to do the editing
84 
85 int edit_labels(EST_Relation &a, EST_String sedfile)
86 {
87  EST_Item *a_ptr;
88  char command[100], name[100], newname[100], sf[100];
89  FILE *fp;
90  strcpy(sf, sedfile);
91  EST_String file1, file2;
92  file1 = make_tmp_filename();
93  file2 = make_tmp_filename();
94 
95  fp = fopen(file1, "wb");
96  if (fp == NULL)
97  {
98  fprintf(stderr,"edit_labels: cannot open \"%s\" for writing\n",
99  (const char *)file1);
100  return -1;
101  }
102  for (a_ptr = a.head(); a_ptr != 0; a_ptr = a_ptr->next())
103  {
104  strcpy(name, a_ptr->name());
105  fprintf(fp, "%s\n", name);
106  }
107  fclose(fp);
108  strcpy(command, "cat ");
109  strcat(command, file1);
110  strcat(command, " | sed -f ");
111  strcat(command, sedfile);
112  strcat(command, " > ");
113  strcat(command, file2);
114 
115  printf("command: %s\n", command);
116  system(command);
117 
118  fp = fopen(file2, "rb");
119  if (fp == NULL)
120  {
121  fprintf(stderr,"edit_labels: cannot open \"%s\" for reading\n",
122  (const char *)file2);
123  return -1;
124  }
125  for (a_ptr = a.head(); a_ptr != 0; a_ptr = a_ptr->next())
126  {
127  fscanf(fp, "%s", newname);
128 // cout << "oldname: " << a_ptr->name() << " newname: " << newname << endl;
129  a_ptr->set_name(newname);
130  }
131  fclose(fp);
132  return 0;
133 }
134 
135 // make new EST_Relation from start and end points.
136 void extract(const EST_Relation &orig, float s,
137  float e, EST_Relation &ex)
138 {
139  EST_Item *a;
140  EST_Item *tmp;
141 
142  for (a = orig.head(); a != 0; a = a->next())
143  if ((a->F("end") > s) && (start(a) < e))
144  {
145  tmp = ex.append(a);
146  if ((a->F("end") > e))
147  tmp->set("end", e);
148  }
149 }
150 
151 void merge_all_label(EST_Relation &seg, const EST_String &labtype)
152 {
153  EST_Item *a_ptr, *n_ptr;
154  (void)labtype; // unused parameter
155 
156  for (a_ptr = seg.head(); a_ptr != seg.tail(); a_ptr = n_ptr)
157  {
158  n_ptr = a_ptr->next();
159  if (a_ptr->name() == a_ptr->next()->name())
160  seg.remove_item(a_ptr);
161  }
162 }
163 
164 void change_label(EST_Relation &seg, const EST_String &oname,
165  const EST_String &nname)
166 {
167  EST_Item *a_ptr;
168 
169  for (a_ptr = seg.head(); a_ptr != 0; a_ptr = a_ptr->next())
170  if (a_ptr->name() == oname)
171  a_ptr->set_name(nname);
172 }
173 
174 void change_label(EST_Relation &seg, const EST_StrList &oname,
175  const EST_String &nname)
176 {
177  EST_Item *a_ptr;
178  EST_Litem *p;
179 
180  for (a_ptr = seg.head(); a_ptr != 0; a_ptr = a_ptr->next())
181  for (p = oname.head(); p ; p = p->next())
182  if (a_ptr->name() == oname(p))
183  a_ptr->set_name(nname);
184 }
185 
186 static int is_in_class(const EST_String &name, EST_StrList &s)
187 {
188  EST_Litem *p;
189 
190  for (p = s.head(); p; p = p->next())
191  if (name == s(p))
192  return TRUE;
193 
194  return FALSE;
195 }
196 
197 int check_vocab(EST_Relation &a, EST_StrList &vocab)
198 {
199  EST_Item *s;
200  for (s = a.head(); s; s = s->next())
201  if (!is_in_class(s->name(), vocab))
202  {
203  cerr<<"Illegal entry in file " <<a.name()<< ":\"" << *s << "\"\n";
204  return -1;
205  }
206  return 0;
207 }
208 
209 void convert_to_broad_class(EST_Relation &seg, const EST_String &class_type,
210  EST_Option &options)
211 {
212  // class_type contains a list of whitepsace separated segment names.
213  // This function looks at each segment and adds a feature "pos"
214  // if its name is contained in the list.
215  EST_String tmp_class_type = class_type + "_list";
216  EST_String bc_list(options.val(tmp_class_type, 1));
217  EST_StrList pos_list;
218  EST_TokenStream ts;
219 
220  ts.open_string(bc_list);
221  while (!ts.eof())
222  pos_list.append(ts.get().string());
223 
224  convert_to_broad(seg, pos_list);
225 }
226 
227 void convert_to_broad(EST_Relation &seg, EST_StrList &pos_list,
228  EST_String broad_name, int polarity)
229 {
230  EST_Item *a_ptr;
231  if (broad_name == "")
232  broad_name = "pos";
233 
234  for (a_ptr = seg.head(); a_ptr != 0; a_ptr = a_ptr->next())
235  if (is_in_class(a_ptr->name(), pos_list))
236  a_ptr->set(broad_name, (polarity) ? 1 : 0);
237  else
238  a_ptr->set(broad_name, (polarity) ? 0 : 1);
239 }
240 
241 void label_map(EST_Relation &seg, EST_Option &map)
242 {
243  EST_Item *p, *n;
244 
245  for (p = seg.head(); p != 0; n = p)
246  {
247  n = p->next();
248  if (map.present(p->name()))
249  {
250  if (map.val(p->name()) == "!DELETE")
251  seg.remove_item(p);
252  else
253  p->set_name(map.val(p->name()));
254  }
255 
256  }
257 }
258 
259 void shift_label(EST_Relation &seg, float shift)
260 {
261  //shift every end time by adding x seconds.
262  EST_Item *a_ptr;
263 
264  for (a_ptr = seg.head(); a_ptr != 0; a_ptr = a_ptr->next())
265  a_ptr->set("end", a_ptr->F("end") + shift);
266 }
267 
268 void RelationList_select(EST_RelationList &mlf, EST_StrList filenames, bool
269  exact_match)
270 {
271  // select only files in 'filenames'
272  // remove all others from mlf
273  EST_Litem *fptr, *ptr;
274  bool flag;
275 
276  // if not exact match, only match basenames
277  EST_StrList tmp_filenames;
278  for (ptr = filenames.head(); ptr != NULL; ptr = ptr->next())
279  if(exact_match)
280  tmp_filenames.append( filenames(ptr) );
281  else
282  tmp_filenames.append( basename(filenames(ptr)) );
283 
284  for(fptr=mlf.head(); fptr != NULL;)
285  {
286  flag=false;
287  for (ptr = tmp_filenames.head(); ptr != NULL; ptr = ptr->next())
288  if(exact_match)
289  {
290  if(tmp_filenames(ptr) == mlf(fptr).name())
291  {
292  flag=true;
293  break;
294  }
295  }
296  else if(mlf(fptr).name().contains(tmp_filenames(ptr)))
297  {
298  flag=true;
299  break;
300  }
301 
302  if(!flag)
303  {
304  fptr = mlf.remove(fptr);
305 
306  if(fptr==0) // must have removed head of list
307  fptr=mlf.head();
308  else
309  fptr=fptr->next();
310  }
311  else
312  fptr=fptr->next();
313  }
314  tmp_filenames.clear();
315 }
316 
317 // look for a single file called "filename" and make a EST_Relation out of
318 // this
319 EST_Relation RelationList_extract(EST_RelationList &mlf, const EST_String &filename, bool base)
320 {
321 
322  EST_Litem *p;
323  EST_String test, ref;
324 
325  if (base)
326  for (p = mlf.head(); p; p = p->next())
327  {
328  if (basename(mlf(p).name(), "*")==basename(filename, "*"))
329  return mlf(p);
330  }
331  else
332  for (p = mlf.head(); p; p = p->next())
333  {
334  if (basename(mlf(p).name()) == filename)
335  return mlf(p);
336  }
337 
338  cerr << "No match for file " << filename << " found in mlf\n";
339  EST_Relation d;
340  return d;
341 }
342 
343 // combine all relation in MLF into a single relation.
344 EST_Relation RelationList_combine(EST_RelationList &mlf)
345 {
346  EST_Litem *p;
347  EST_Relation all;
348  EST_Item *s, *t = 0;
349  float last = 0.0;
350 
351  for (p = mlf.head(); p; p = p->next())
352  {
353  for (s = mlf(p).head(); s; s = s->next())
354  {
355  t = all.append();
356  t->set("name", s->S("name"));
357  t->set("end", s->F("end") + last);
358  cout << "appended t " << t << endl;
359  }
360  last = (t != 0) ? t->F("end") : 0.0;
361  }
362  return all;
363 }
364 
365 EST_Relation RelationList_combine(EST_RelationList &mlf, EST_Relation &key)
366 {
367  EST_Litem *p;
368  EST_Relation all;
369  EST_Item *s, *t = 0, *k;
370  float st;
371 
372  if (key.length() != mlf.length())
373  {
374  cerr << "RelationList has " << mlf.length() << " elements: expected "
375  << key.length() << " from key file\n";
376  return all;
377  }
378 
379  for (k = key.head(), p = mlf.head(); p; p = p->next(), k = k->next())
380  {
381  st = start(k);
382  for (s = mlf(p).head(); s; s = s->next())
383  {
384  t = all.append();
385  t->set("name", s->S("name"));
386  t->set("end", (s->F("end") + st));
387  }
388  }
389  return all;
390 }
391 
392 int relation_divide(EST_RelationList &slist, EST_Relation &lab,
393  EST_Relation &keylab,
394  EST_StrList &blank, EST_String ext)
395 { // divides a single relation into multiple chunks according to the
396  // keylab relation. If the keylab boundary falls in the middle of a label,
397  // the label is assigned to the chunk which has the most overlap with
398  // it. Some labels may be specified in the "blank" list which means thy
399  // are duplicated across boundaries.
400 
401  EST_Relation a, newkey;
402  EST_Item *s, *k, *t = 0, *n;
403  EST_String filename;
404  float kstart;
405 
406  slist.clear();
407 
408  if ((keylab.tail())->F("end") < (lab.tail())->F("end"))
409  {
410  cerr << "Key file must extend beyond end of label file\n";
411  return -1;
412  }
413 
414  // find a the first keylab that will make a non-empty file
415  for (k = keylab.head(); k ; k = k->next())
416  if (k->F("end") > lab.head()->F("end"))
417  break;
418 
419  filename = (EST_String)k->f("file");
420  a.f.set("name", (filename + ext));
421  kstart = 0.0;
422 
423  for (s = lab.head(); s; s = s->next())
424  {
425  n = s->next();
426  if (n == 0)
427  {
428  t = a.append(s);
429  t->set("end", (s->F("end") - kstart));
430  break;
431  }
432  if (n->F("end") > k->F("end"))
433  {
434  if (((n->F("end") - k->F("end")) <
435  (k->F("end") - start(n))) ||
436  is_in_class(n->name(), blank))
437  {
438  a.append(s);
439  t->set("end", (s->F("end") - kstart));
440 
441  t = a.append(n);
442  t->set("end", (k->F("end") - kstart));
443 
444  if (!is_in_class(n->name(), blank))
445  s = s->next();
446  }
447  else
448  {
449  t = a.append(s);
450  t->set("end", (k->F("end") - kstart));
451  }
452 
453  slist.append(a);
454  k = k->next();
455  kstart = start(k);
456  a.clear();
457  filename = (EST_String)k->f("file");
458  a.f.set("name", (filename + ext));
459  }
460  else
461  {
462  t = a.append(s);
463  t->set("end", (s->F("end") - kstart));
464  }
465  }
466  slist.append(a);
467 
468  return 0;
469 }
470 
471 int relation_divide2(EST_RelationList &mlf, EST_Relation &lab,
472  EST_Relation &keylab, EST_String ext)
473 {
474  EST_Relation a, newkey;
475  EST_Item *s, *k, *t;
476  float kstart;
477 
478  mlf.clear();
479 
480  if ((keylab.tail())->F("end") < (lab.tail())->F("end"))
481  {
482  cerr << "Key file must extend beyond end of label file\n";
483  return -1;
484  }
485 
486  k = keylab.head();
487  a.f.set("name", (k->name() + ext));
488  kstart = 0.0;
489 
490  for (s = lab.head(); s; s = s->next())
491  {
492  t = a.append();
493  t->set_name(s->name());
494  t->set("end", (s->F("end") - kstart));
495 
496  if (s->F("end") > k->F("end"))
497  {
498  cout << "appending " << a;
499  mlf.append(a);
500 
501  kstart = s->F("end");
502  k->set("end", (s->F("end")));
503  k = k->next();
504  a.clear();
505  a.f.set("name", (k->name() + ext));
506  }
507  }
508  cout << "appending " << a;
509  mlf.append(a);
510 
511  return 0;
512 }
513 
514 
515 
516 
517 void map_match_times(EST_Relation &target, const EST_String &match_name,
518  const EST_String &time_name, bool do_start)
519 {
520  EST_Item *s, *t, *p;
521  float prev_end, inc, first_end, last_end;
522  int i;
523 
524  // first pass, copy times as appropriate, and find first
525  // and last defined ends
526  // This is hacky and certainly won't work for many cases
527 
528  first_end = -1.0;
529  prev_end = 0.0;
530  last_end = 0.0;
531 
532 // cout << "surface: " << surface << endl;
533 
534  for (s = target.head(); s; s = s->next())
535  {
536  if ((t = daughter1(s->as_relation(match_name))) != 0)
537  {
538  s->set(time_name + "end", t->F("end"));
539  if (do_start)
540  s->set(time_name + "start", t->F("start"));
541 
542  last_end = t->F("end");
543  if (first_end < 0.0)
544  first_end = t->F("end");
545  }
546  }
547 
548  if (!target.head()->f_present(time_name + "end"))
549  {
550  target.head()->set(time_name + "end", first_end / 2.0);
551  if (do_start)
552  target.head()->set(time_name + "start", 0.0);
553  }
554 
555  if (!target.tail()->f_present(time_name + "end"))
556  {
557  target.tail()->set(time_name + "end", last_end + 0.01);
558  if (do_start)
559  target.tail()->set(time_name + "start", last_end);
560  }
561 
562  for (s = target.head(); s; s = s->next())
563  {
564  if (!s->f_present(time_name + "end"))
565  {
566 // cout << "missing end feature for " << *s << endl;
567  for (i = 1, p = s; p; p = p->next(), ++i)
568  if (p->f_present(time_name + "end"))
569  break;
570  inc = (p->F(time_name + "end") - prev_end) / ((float) i);
571 // cout << "inc is : " << inc << endl;
572 
573 // cout << "stop phone is " << *p << endl;
574 
575  for (i = 1; s !=p ; s = s->next(), ++i)
576  {
577  s->set(time_name + "end", (prev_end + ((float) i * inc)));
578  if (do_start)
579  s->set(time_name + "start", (prev_end+((float) (i - 1 )* inc)));
580  }
581  }
582  prev_end = s->F("end");
583  }
584 }
585 
586 void dp_time_align(EST_Utterance &utt, const EST_String &source_name,
587  const EST_String &target_name,
588  const EST_String &time_name,
589  bool do_start)
590 {
591  utt.create_relation("Match");
592 
593  dp_match(*utt.relation(target_name), *utt.relation(source_name),
594  *utt.relation("Match"), 7.0, 7.0, 7.0);
595 
596  map_match_times(*utt.relation(target_name), "Match", time_name, do_start);
597 }
598 
599 
600 EST_Litem *RelationList_ptr_extract(EST_RelationList &mlf, const EST_String &filename, bool base)
601 {
602  EST_Litem *p;
603  EST_String test, ref;
604 
605  if (base)
606  for (p = mlf.head(); p; p = p->next())
607  {
608  if (basename(mlf(p).name(), "*")==basename(filename, "*"))
609  return p;
610  }
611  else
612  for (p = mlf.head(); p; p = p->next())
613  if (mlf(p).name() == filename)
614  return p;
615 
616  cerr << "No match for file " << filename << " found in mlf\n";
617  return 0;
618 }
619 
620 void relation_convert(EST_Relation &lab, EST_Option &al, EST_Option &op)
621 {
622  if (al.present("-shift"))
623  shift_label(lab, al.fval("-shift"));
624 
625  // fix option later.
626  if (al.present("-extend"))
627  al.override_fval("-length",
628  al.fval("-extend",0) * lab.tail()->F("end"));
629 
630  // quantize (ie round up or down) label times
631  if (al.present("-q"))
632  quantize(lab, al.fval("-q"));
633 
634  if (al.present("-start"))
635  {
636  if (!al.present("-end"))
637  cerr << "-start option must be used with -end option\n";
638  else
639  extract(lab, al.fval("-start"), al.fval("-end"), lab);
640  }
641 
642  if (al.present("-class"))
643  convert_to_broad_class(lab, al.val("-class"), op);
644 
645  else if (al.present("-pos"))
646  {
647  EST_StrList bclass;
648  StringtoStrList(al.val("-lablist"), bclass);
649  convert_to_broad(lab, bclass);
650  }
651  else if (al.present("-sed"))
652  edit_labels(lab, al.val("-sed"));
653  else if (al.present("-map"))
654  {
655  EST_Option map;
656  if (map.load(al.val("-map")) != format_ok)
657  return;
658  label_map(lab, map);
659  }
660 }
661 
662 
663 
664 void print_relation_features(EST_Relation &stream)
665 {
666  EST_Item *s;
668 
669  for (s = stream.head(); s; s = s->next())
670  {
671  cout << s->name() << "\t:";
672  for(p.begin(s->features()); p; ++p)
673  cout << p->k << " "
674  << p->v << "; ";
675  cout << endl;
676  }
677 
678 }
679 
680 
681 void build_RelationList_hash_table(EST_RelationList &mlf,
682  EST_hashedRelationList &hash_table,
683  const bool base)
684 {
685  EST_Litem *p;
686  if (base)
687  for (p = mlf.head(); p; p = p->next())
688  hash_table.add_item(basename(mlf(p).name(), "*"),
689  &(mlf(p)));
690  else
691  for (p = mlf.head(); p; p = p->next())
692  hash_table.add_item(mlf(p).name(),
693  &(mlf(p)));
694 }
695 
696 
697 bool hashed_RelationList_extract(EST_Relation* &rel,
698  const EST_hashedRelationList &hash_table,
699  const EST_String &filename, bool base)
700 {
701  EST_Relation *d;
702  EST_String fname = filename;
703  int found;
704 
705  if (base)
706  fname=basename(filename, "*");
707 
708  d=hash_table.val(fname,found);
709 
710  if(found)
711  {
712  rel = d;
713  return true;
714  }
715  cerr << "No match for file " << fname << " found in mlf\n";
716  return false;
717 }
718 
719