Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
dp_main.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author: Simon King */
34 /* Date : May 1998 */
35 /*-----------------------------------------------------------------------*/
36 /* Simple dynamic programming */
37 /* e.g. for aligning pronunciations */
38 /*=======================================================================*/
39 
40 #include <cstdlib>
41 #include <cstdio>
42 #include <cmath>
43 #include "EST.h"
44 
45 EST_read_status load_TList_of_StrVector(EST_TList<EST_StrVector> &w,
46  const EST_String &filename,
47  const int vec_len);
48 
49 typedef
50 float (*local_cost_function)(const EST_Item *item1,
51  const EST_Item *item2);
52 
53 typedef
54 bool (*local_pruning_function)(const int i,
55  const int j,
56  const int max_i,
57  const int max_j);
58 
59 bool dp_match(const EST_Relation &lexical,
60  const EST_Relation &surface,
61  EST_Relation &match,
62  local_cost_function lcf,
63  local_pruning_function lpf,
64  EST_Item *null_sym);
65 
66 bool dp_match(const EST_Relation &lexical,
67  const EST_Relation &surface,
68  EST_Relation &match,
69  local_cost_function lcf,
70  EST_Item *null_sym);
71 
72 float local_cost(const EST_Item *s1, const EST_Item *s2);
73 bool local_prune(const int i, const int j,
74  const int max_i,const int max_j);
75 static void load_vocab(const EST_String &vfile);
76 static EST_Item *null_sym;
77 //static EST_String deleted_marker = "<del>";
78 //static EST_String inserted_marker = "<ins>";
79 static bool show_cost=FALSE;
80 static int prune_width = 100;
81 //static float path_cost;
82 int StrVector_index(const EST_StrVector &v, const EST_String &s);
83 
84 EST_StrList pattern1_l, pattern2_l, vocab_l;
85 EST_StrVector pattern1, pattern2, vocab;
86 
87 EST_FMatrix DP_substitution_cost;
88 EST_FVector DP_deletion_cost;
89 EST_FVector DP_insertion_cost;
90 EST_IMatrix DP_path_i,DP_path_j;
91 EST_FMatrix cost_matrix;
92 
93 // this are local distance measures,
94 // NOT the 1/2/1 weights in the DP recursion formula !
95 float insertion_cost = 1;
96 float deletion_cost = 1;
97 float substitution_cost = 1;
98 
99 
100 // two possibilities :
101 // 1. simple insertion/deletion/substitution penalties
102 // 2. matrix of distances between all pairs of vocab items,
103 // where vocab includes some null symbol for insertions/deletions
104 
105 EST_String distance_measure = "simple"; // could be "matrix"
106 
107 
108 
109 
110 /** @name <command>dp</command> <emphasis> Perform dynamic programming on label sequences</emphasis>
111  * @id dp-manual
112  * @toc
113  */
114 
115 //@{
116 
117 /**@name Synopsis
118  */
119 //@{
120 
121 //@synopsis
122 
123 /**
124 dp provides simple dynamic programming to find the lowest cost
125 alignment of two symbol sequences. Possible uses include:
126 
127 <itemizedlist>
128 <listitem><para>Label alignment (e.g. speech recogniser output scoring) </para></listitem>
129 </itemizedlist>
130 
131 The costs of inserting/deleting/substituting symbols can be given
132 either with command line options, or as a file containing a full
133 matrix of costs. In the former case, all insertions will have the same
134 cost. In the latter case, each row of the file contains the cost of
135 replacing a symbol in sequence 1 with each possible symbol in the
136 vocabulary to align with sequence 2, including a special "place
137 holder" symbol used for insertions/deletions. See the examples
138 below. The place holder can be redefined.
139 
140 The output is an EST utterance with three Relations: the first two are
141 the input sequences, and the third shows the alignment found by
142 dynamic programming.
143 
144 */
145 
146 //@}
147 
148 /**@name Options
149  */
150 //@{
151 
152 //@options
153 
154 //@}
155 
156 
157 int main(int argc, char **argv)
158 {
159  EST_StrList files;
160  EST_Option al;
161  EST_String out_file;
162  //int i;
163  EST_Relation *path1, *path2, *match;
164 
165  null_sym = new EST_Item;
166  null_sym->set_name("<null>");
167 
168  parse_command_line(argc, argv,
169  EST_String("Usage:\n")+
170  "dp <options> \"pattern 1\" \"pattern 2\"\n"+
171  "Find the best alignment of a pair of symbol sequences (e.g. word pronuciations).\n"+
172  "-vocab <string> file containing vocabulary\n"+
173  "-place_holder <string> which vocab item is the place holder (default is " + null_sym->name() + " )\n"+
174  "-show_cost show cost of matching path\n"+
175  "-o <string> output file\n"+
176  "-p <int> 'beam' width\n"+
177  "Either:\n"+
178  "-i <float> insertion cost\n"+
179  "-d <float> deletion cost\n"+
180  "-s <float> substitution cost\n"+
181  "Or:\n"+
182  "-cost_matrix <string> file containing cost matrix\n",
183  files, al);
184 
185  out_file = al.present("-o") ? al.val("-o") : (EST_String)"-";
186 
187  if (al.present("-vocab"))
188  load_vocab(al.val("-vocab"));
189  else
190  {
191  cerr << argv[0] << ": no vocab file specified" << endl;
192  exit(-1);
193  }
194 
195  if (al.present("-p"))
196  prune_width = al.ival("-p");
197 
198  if (al.present("-cost_matrix"))
199  {
200  if(al.present("-i") || al.present("-d") || al.present("-s") )
201  {
202  cerr << "Can't have ins/del/subs costs as well as matrix !" << endl;
203  exit(-1);
204  }
205  distance_measure="matrix";
206  cost_matrix.load(al.val("-cost_matrix"));
207 
208  if(al.present("-place_holder"))
209  null_sym->set_name(al.val("-place_holder"));
210 
211  if(StrVector_index(vocab,null_sym->name()) < 0)
212  {
213  cerr << "The place holder symbol '" << null_sym->name();
214  cerr << "' is not in the vocbulary !" << endl;
215  exit(-1);
216  }
217 
218  if(cost_matrix.num_columns() != vocab.length())
219  {
220  cerr << "Cost matrix number of columns must match vocabulary size !" << endl;
221  exit(-1);
222  }
223  if(cost_matrix.num_rows() != vocab.length())
224  {
225  cerr << "Cost matrix number of rows must match vocabulary size !" << endl;
226  exit(-1);
227  }
228 
229  }
230  else if(al.present("-i") && al.present("-d") && al.present("-s") )
231  {
232  insertion_cost = al.fval("-i");
233  deletion_cost = al.fval("-d");
234  substitution_cost = al.fval("-s");
235  }
236  else
237  {
238  cerr << "Must give either ins/del/subs costs or cost matrix !" << endl;
239  exit(-1);
240  }
241 
242 
243  if(al.present("-show_cost"))
244  show_cost=TRUE;
245 
246  if(files.length() != 2)
247  {
248  cerr << "Must give 2 patterns !" << endl;
249  exit(-1);
250  }
251 
252  StringtoStrList(files(files.head()),pattern1_l," ");
253  StringtoStrList(files(files.head()->next()),pattern2_l," ");
254 
255  EST_Utterance utt;
256  path1 = utt.create_relation("Lexical");
257  path2 = utt.create_relation("Surface");
258  match = utt.create_relation("Match");
259 
260  EST_Litem *p;
261  for(p=pattern1_l.head();p != 0; p=p->next())
262  {
263  if( StrVector_index(vocab,pattern1_l(p)) < 0)
264  {
265  cerr << pattern1_l(p) << " is not in the vocabulary !" << endl;
266  exit(1);
267  }
268  EST_Item new_item;
269  new_item.set_name(pattern1_l(p));
270  path1->append(&new_item);
271  }
272 
273  for(p=pattern2_l.head();p != 0; p=p->next())
274  {
275  if( StrVector_index(vocab,pattern2_l(p)) < 0)
276  {
277  cerr << pattern2_l(p) << " is not in the vocabulary !" << endl;
278  exit(1);
279  }
280  EST_Item new_item;
281  new_item.set_name(pattern2_l(p));
282  path2->append(&new_item);
283  }
284 
285  // to do : check all items in two patterns are in vocab
286  // .....
287 
288 
289 // cerr << "MATCHING..." << endl;
290  if(!dp_match(*path1,*path2,*match,
291  local_cost,local_prune,null_sym))
292 // cerr << "OK !" << endl;
293 // else
294  cerr << "No match could be found." << endl;
295 
296 
297  utt.save(out_file);
298 
299 
300  return 0;
301 }
302 
303 
304 static void load_vocab(const EST_String &vfile)
305 {
306  // Load vocabulary (strings)
307  EST_TokenStream ts;
308 
309  if (ts.open(vfile) == -1)
310  {
311  cerr << "can't find vocab file \"" << vfile << "\"" << endl;
312  exit(-1);
313  }
314 
315  while (!ts.eof())
316  if (ts.peek() != "")
317  vocab_l.append(ts.get().string());
318 
319  ts.close();
320 
321  StrList_to_StrVector(vocab_l,vocab);
322 }
323 
324 
325 float local_cost(const EST_Item *s1, const EST_Item *s2)
326 {
327  // N.B. cost is not necessarily zero for matching symbols
328 
329  //cerr << "lcf " << s1 << "," << s2 << endl;
330 
331  // otherwise cost is either insertion cost, or cost_matrix value
332  if(distance_measure == "simple")
333  {
334  if(s1->name() == s2->name())
335  return 0;
336  else
337  {
338  if(s1 == null_sym)
339  return insertion_cost;
340  else if(s2 == null_sym)
341  return deletion_cost;
342  else
343  return substitution_cost;
344  }
345  }
346 
347  //cerr << "Cost of replacing " << s1 << " with " << s2 << " is ";
348  //cerr << cost_matrix(StrVector_index(vocab,s1),StrVector_index(vocab,s2)) << endl;
349 
350  return cost_matrix(StrVector_index(vocab,s1->name()),
351  StrVector_index(vocab,s2->name()));
352 
353 }
354 
355 bool local_prune(const int i, const int j,
356  const int max_i, const int max_j)
357 {
358 
359  // keep it simple :
360  // if we stray too far from the diagonal - PRUNE !
361 
362  float scale = (float)max_i / (float)max_j;
363 
364  float near_j = (float)i / scale;
365  float near_i = (float)j * scale;
366 
367  /*
368  cerr << "prune i: " << i << " " << near_i - (float)i
369  << " " << abs(near_i - (float)i)<< endl;
370  cerr << "prune j: " << j << " " << near_j - (float)j
371  << " " << abs(near_j - (float)j) << endl;
372  */
373 
374  if( (abs((int)(near_i - (float)i)) > prune_width) ||
375  (abs((int)(near_j - (float)j)) > prune_width) )
376  {
377  //cerr << "lpf " << i << "," << j << " true" << endl;
378  return TRUE;
379  }
380 
381  return FALSE;
382 
383 }
384 
385 
386 /**@name Examples
387 
388 <para>
389 Align two symbol sequences:
390 </para>
391 
392 <para>
393 <screen>
394 $ dp -vocab vocab.file "a b c" "a b d c" -i 1 -d 2 -s 3
395 </screen>
396 </para>
397 
398 <para>
399 where vocab.file contains "a b c d"
400 </para>
401 
402 <para>
403 Or, using a full cost matrix:
404 </para>
405 
406 <para>
407 <screen>
408 $ dp -vocab vocab2.file -cost_matrix foo "a b c" "a b d c"
409 </screen>
410 </para>
411 
412 <para>
413 where vocab2.file contains "a b c d <null>" and the file foo contains:
414 </para>
415 
416 <para>
417 <screen>
418 <para>0 3 3 3 2</para>
419 <para>3 0 3 3 2</para>
420 <para>3 3 0 3 2</para>
421 <para>3 3 3 0 2</para>
422 <para>1 1 1 1 0</para>
423 </screen>
424 </para>
425 
426 <para> Each row of foo shows the cost of replacing an input symbol
427 with each symbol in the vocabulary to match an output symbol. Each row
428 corresponds to an item in the vocabulary (in the order they appear in
429 the vocabulary file). In the example, replacing 'a' with 'a' costs 0,
430 replacing 'a' with any of 'b' 'c' or 'd' costs 3 (a substitution), and
431 replacing 'a' with the place holder symbol 'null' costs 2 (a
432 deletion). The cost of replacing 'null' with anything other than
433 'null' costs 1 (an insertion). The costs of 1,2 and 3 used here are
434 only for illustration. The cost matrix meed not have the form above -
435 for example, replacing 'a' with 'a' need not cost 0. The entries in
436 foo are read as floats. </para>
437 
438 */
439 //@{
440 //@}