Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
align_main.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : September 1999 */
35 /*-----------------------------------------------------------------------*/
36 /* Simple alignment scoring program to give number of insertions, */
37 /* deletions, errors between a string of symbols and a reference string */
38 /* of symbols */
39 /* */
40 /*=======================================================================*/
41 #include <cstdlib>
42 #include <cstdio>
43 #include <iostream>
44 #include <fstream>
45 #include <cstring>
46 #include "EST.h"
47 #include "EST_WFST.h"
48 
49 static int align_main(int argc, char **argv);
50 static void nisttool_align(EST_Option &al);
51 static void string_align(EST_Option &al);
52 static void align_score(EST_Utterance &u, const EST_String &refrel,
53  const EST_String &hyporel,
54  const EST_String &alignrel,
55  int &total,int &ins,int &del,int &sub,int &correct);
56 static int name_distance(EST_Item *r,EST_Item *h);
57 void align(EST_Utterance &utt,
58  const EST_String &refrel,
59  const EST_String &hyporel,
60  const EST_String &alignrel);
61 static void load_sentence(EST_Utterance &u, const EST_String &relname,
62  EST_TokenStream &ts);
63 static void load_sentence(EST_Utterance &u, const EST_String &relname,
64  EST_String &relval);
65 
66 /** @name <command>align</command> <emphasis>align stream with reference stream</emphasis>
67  @id align-manual
68  * @toc
69  */
70 
71 //@{
72 
73 
74 /**@name Synopsis
75  */
76 //@{
77 
78 //@synopsis
79 
80 /**
81 
82  */
83 
84 //@}
85 
86 /**@name OPTIONS
87  */
88 //@{
89 
90 //@options
91 
92 //@}
93 int main(int argc, char **argv)
94 {
95 
96  align_main(argc,argv);
97 
98  exit(0);
99  return 0;
100 }
101 
102 static int align_main(int argc, char **argv)
103 {
104  // Top level function generates a WFST from rules
105  EST_Option al;
106  EST_StrList files;
107  EST_String outfile;
108  EST_String format;
109 
110  parse_command_line
111  (argc, argv,
112  EST_String("[options] ...\n")+
113  "Summary: align an hypothesis with a reference string\n"+
114  "-rfile <ifile> Reference file\n"+
115  "-hfile <ifile> Hypothesis file\n"+
116  "-rstring <string> Reference string\n"+
117  "-hstring <string> Hypothesis string\n"+
118  "-format <string>\n"+
119  " Supported formats: strings, nisttool\n",
120  files, al);
121 
122  if (al.present("-o"))
123  outfile = al.val("-o");
124  else
125  outfile = "-";
126 
127  if (al.present("-format"))
128  format = al.val("-format");
129  else
130  format = "strings";
131 
132  if (format == "strings")
133  string_align(al);
134  else if (format == "nisttool")
135  nisttool_align(al);
136  else
137  cout << "Unknown or unhandled format: " << format << endl;
138 
139  return 0;
140 }
141 
142 bool dp_match(const EST_Relation &lexical,
143  const EST_Relation &surface,
144  EST_Relation &match,
145  float ins, float del, float sub);
146 
147 static void string_align(EST_Option &al)
148 {
149  EST_String refStr = al.val("-rstring");
150  EST_String hypStr = al.val("-hstring");
151  EST_Utterance u;
152  int total,ins,del,sub,correct;
153 
154  load_sentence(u,"ref",refStr);
155  load_sentence(u,"hypo",hypStr);
156  align(u,"ref","hypo","align");
157  align_score(u,"ref","hypo","align",total,ins,del,sub,correct);
158  fprintf(stdout,"words %d\n",total);
159  fprintf(stdout,"insertions %d\n",ins);
160  fprintf(stdout,"deletions %d\n",del);
161  fprintf(stdout,"substitutions %d\n",sub);
162  fprintf(stdout,"correct %d\n",correct);
163  fprintf(stdout,"WER %f\n",(100.0 * (float)(ins+del+sub))/total);
164 }
165 
166 static void nisttool_align(EST_Option &al)
167 {
168  // Using the format used by the NIST tools for alignment
169  // Sentence per line with parenthesized id at end
170  EST_String reffile = al.val("-rfile");
171  EST_String hypofile = al.val("-hfile");
172  EST_TokenStream rts,hts;
173  EST_Item *r, *h;
174  static EST_Regex id("^(.*)$");
175  int sents=0;
176  int total,ins,del,sub,correct;
177  int s_total,s_ins,s_del,s_sub,s_correct;
178 
179  rts.open(reffile);
180  hts.open(hypofile);
181  s_total=s_ins=s_del=s_sub=s_correct=0;
182 
183  while (!rts.eof())
184  {
185  EST_Utterance u;
186 
187  load_sentence(u,"ref",rts);
188  load_sentence(u,"hypo",hts);
189  r = u.relation("ref")->last();
190  h = u.relation("hypo")->last();
191  if ((!r->name().matches(id)) ||
192  (r->name() != h->name()))
193  {
194  cerr << "Align: failed to match sentence " <<
195  sents << " at id " << r->name() << endl;
196  }
197  else
198  {
199  // Ids aren't counted as words
200  r->unref_all();
201  h->unref_all();
202  align(u,"ref","hypo","align");
203  // This doesn't give exactly the same as the NIST tools
204  // even though it should (actually I think its better)
205 // dp_match(*u.relation("ref"),
206 // *u.relation("hypo"),
207 // *u.create_relation("align"),
208 // 3,3,4);
209  align_score(u,"ref","hypo","align",
210  total,ins,del,sub,correct);
211  s_total += total;
212  s_ins += ins;
213  s_del += del;
214  s_sub += sub;
215  s_correct += correct;
216  }
217  sents++;
218  }
219 
220  rts.close();
221  hts.close();
222  fprintf(stdout,"sentences %d\n",sents);
223  fprintf(stdout,"words %d\n",s_total);
224  fprintf(stdout,"insertions %d\n",s_ins);
225  fprintf(stdout,"deletions %d\n",s_del);
226  fprintf(stdout,"substitutions %d\n",s_sub);
227  fprintf(stdout,"correct %d\n",s_correct);
228  fprintf(stdout,"WER %f\n",(100.0 * (float)(s_ins+s_del+s_sub))/s_total);
229 }
230 
231 static void load_sentence(EST_Utterance &u,
232  const EST_String &relname,
233  EST_TokenStream &ts)
234 {
235  EST_Relation *r = u.create_relation(relname);
236 
237  do
238  {
239  EST_Item *i = r->append();
240  i->set_name(ts.get());
241  }
242  while ((!ts.eoln()) && (!ts.eof()));
243 }
244 
245 static void load_sentence(EST_Utterance &u,
246  const EST_String &relname,
247  EST_String &relval)
248 {
249  EST_Relation *r = u.create_relation(relname);
250  EST_StrList strlist;
251  StringtoStrList(relval, strlist, " ");
253 
254  for (iter.begin(strlist); iter; ++iter)
255  {
256  EST_Item *i = r->append();
257  i->set_name(*iter);
258  }
259 }
260 
261 static void align_score(EST_Utterance &u, const EST_String &refrel,
262  const EST_String &hyporel,
263  const EST_String &alignrel,
264  int &total,int &ins,int &del,int &sub,int &correct)
265 {
266  // Score alignment
267  EST_Item *ri,*hi;
268  total=ins=del=correct=sub=0;
269 
270  for (ri=u.relation(refrel)->first(),
271  hi=u.relation(hyporel)->first();
272  ri;
273  ri=ri->next(),hi=hi->next())
274  {
275  for ( ; (as(hi,alignrel) == 0) && hi ; hi=hi->next())
276  {
277  fprintf(stdout,"inserted: %s\n",(const char *)hi->name());
278  ins++;
279  }
280  for ( ; (daughter1(ri,alignrel) == 0) && ri; ri=ri->next())
281  {
282  fprintf(stdout,"deleted: %s\n",(const char *)ri->name());
283  del++;
284  }
285  if (!ri)
286  break;
287  if (name_distance(ri,daughter1(ri,alignrel)) == 0)
288  {
289  fprintf(stdout,"correct: %s\n",(const char *)ri->name());
290  correct++;
291  }
292  else
293  {
294  fprintf(stdout,"substituted: %s\n",(const char *)ri->name());
295  sub++;
296  }
297  }
298  // For trailing hypothesized (or ref is nil)
299  for ( ; hi ; hi=hi->next())
300  {
301  fprintf(stdout,"inserted: %s\n",(const char *)hi->name());
302  ins++;
303  }
304 
305  total = u.relation(refrel)->length();
306 
307 
308 // fprintf(stdout,"total %d ins %d del %d subs %d correct %d\n",
309 // total, ins, del, sub, correct);
310 }
311 
312 static int name_distance(EST_Item *r,EST_Item *h)
313 {
314  EST_String rname = r->name();
315  EST_String hname = h->name();
316  if ((rname == hname) ||
317  (downcase(rname) == downcase(hname)))
318  return 0;
319  else
320  return 1;
321 }
322 
323 void align(EST_Utterance &utt,
324  const EST_String &refrel,
325  const EST_String &hyporel,
326  const EST_String &alignrel)
327 {
328  // Align refrel to hyporel by alignrel
329  int r_size = utt.relation(refrel)->length();
330  int h_size = utt.relation(hyporel)->length();
331  EST_Item *ri = utt.relation(refrel)->first();
332  EST_Item *hi = utt.relation(hyporel)->first();
333  int i,j;
334  int insdel_cost = 3;
335  int subs_cost = 4;
336  float to_insert,to_del,to_subs;
337  float cost;
338 
339  EST_Relation *ar = utt.create_relation(alignrel);
340 
341  EST_FMatrix dpt(r_size+1,h_size+1);
342  EST_IMatrix dpp(r_size+1,h_size+1);
343 
344  // Initialise first row and column
345  dpt(0,0) = subs_cost * name_distance(ri,hi);
346  dpp(0,0) = 0;
347  for (i=1; i<r_size+1; i++)
348  {
349  dpt(i,0) = insdel_cost + dpt(i-1,0);
350  dpp(i,0) = -1; // deletion
351  }
352  for (j=1; j < h_size+1; j++)
353  {
354  dpt(0,j) = insdel_cost + dpt(0,j-1);
355  dpp(0,j) = 1; // insertion
356  }
357 
358  ri = utt.relation(refrel)->first();
359  for (i=1; ri; ri=ri->next(),i++)
360  {
361  ar->append(ri); // for use later
362  hi = utt.relation(hyporel)->first();
363  for (j=1; hi; hi=hi->next(),j++)
364  {
365  cost = name_distance(ri,hi);
366  to_insert = insdel_cost + dpt(i,j-1);
367  to_del = insdel_cost + dpt(i-1,j);
368  to_subs = (cost * subs_cost) + dpt(i-1,j-1);
369  if (to_insert < to_del)
370  {
371  if (to_insert < to_subs)
372  {
373  dpt(i,j) = to_insert;
374  dpp(i,j) = 1;
375  }
376  else
377  {
378  dpt(i,j) = to_subs;
379  dpp(i,j) = 0;
380  }
381  }
382  else
383  {
384  if (to_del < to_subs)
385  {
386  dpt(i,j) = to_del;
387  dpp(i,j) = -1;
388  }
389  else
390  {
391  dpt(i,j) = to_subs;
392  dpp(i,j) = 0;
393  }
394  }
395  }
396  }
397 
398 // for (i=1,ri=utt.relation(refrel)->first(); i < r_size+1; i++,ri=ri->next())
399 // {
400 // fprintf(stdout,"%10s ",(const char *)ri->name());
401 // for (j=1,hi=utt.relation(hyporel)->first(); j<h_size+1; j++,hi=hi->next())
402 // fprintf(stdout,"%3d/%2d ",(int)dpt(i,j),dpp(i,j));
403 // fprintf(stdout,"\n");
404 // }
405 
406  for (i=r_size,j=h_size,
407  ri=utt.relation(refrel)->last(),
408  hi=utt.relation(hyporel)->last();
409  ri; i--,ri=ri->prev())
410  {
411  while (dpp(i,j) == 1)
412  {
413  j--;
414 // fprintf(stdout,"skipping hi %s\n",
415 // (const char *)hi->name());
416  hi=hi->prev();
417  }
418  if (dpp(i,j) == 0)
419  {
420 // fprintf(stdout,"linking %s %s\n",
421 // (const char *)ri->name(),
422 // (const char *)hi->name());
423  append_daughter(ri,alignrel,hi);
424  j--;
425  hi=hi->prev();
426  }
427 // else
428 // fprintf(stdout,"skipping ri %s\n",
429 // (const char *)ri->name());
430  }
431 }