Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
wfst_train_main.cc
1 /*************************************************************************/
2 /* */
3 /* Language Technologies Institute */
4 /* Carnegie Mellon University */
5 /* Copyright (c) 1999 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : October 1999 */
35 /*-----------------------------------------------------------------------*/
36 /* A training method for splitting states in a WFST from data */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <cstdio>
41 #include <iostream>
42 #include <fstream>
43 #include <cstring>
44 #include "EST.h"
45 #include "EST_simplestats.h"
46 #include "EST_WFST.h"
47 
48 LISP load_string_data(EST_WFST &wfst,EST_String &filename);
49 void wfst_train(EST_WFST &wfst, LISP data);
50 
51 static int wfst_train_main(int argc, char **argv);
52 
53 /** @name <command>wfst_train</command> <emphasis>Train a weighted finite-state transducer</emphasis>
54  @id wfst-train-manual
55  * @toc
56  */
57 
58 //@{
59 
60 
61 /**@name Synopsis
62  */
63 //@{
64 
65 //@synopsis
66 
67 /**
68 This takes an existing WFST and data and splits states in an entropy
69 reduce way to produced a new WFST that better models the given data.
70 
71  */
72 
73 //@}
74 
75 /**@name OPTIONS
76  */
77 //@{
78 
79 //@options
80 
81 //@}
82 
83 
84 int main(int argc, char **argv)
85 {
86 
87  wfst_train_main(argc,argv);
88 
89  exit(0);
90  return 0;
91 }
92 
93 static int wfst_train_main(int argc, char **argv)
94 {
95  // Train a WFST from data building new states
96  EST_Option al;
97  EST_StrList files;
98  EST_String wfstfile;
99  FILE *ofd;
100 
101  parse_command_line
102  (argc, argv,
103  EST_String("[WFSTFILE] [input file0] ... [-o output file]\n")+
104  "Summary: Train a WFST on data\n"+
105  "-wfst <ifile> The WFST to start from\n"+
106  "-data <ifile> Sentences in the language recognised by WFST\n"+
107  "-o <ofile> Output file for trained WFST\n"+
108  "-heap <int> {210000}\n"+
109  " Set size of Lisp heap, needed for large rulesets\n",
110  files, al);
111 
112  if (al.present("-o"))
113  {
114  if ((ofd=fopen(al.val("-o"),"w")) == NULL)
115  EST_error("can't open output file for writing \"%s\"",
116  (const char *)al.val("-o"));
117  }
118  else
119  ofd = stdout;
120 
121  if (al.present("-wfst"))
122  wfstfile = al.val("-wfst");
123  else
124  EST_error("no WFST specified");
125 
126  siod_init(al.ival("-heap"));
127  siod_est_init();
128 
129  EST_WFST wfst;
130  LISP data;
131 
132  if (wfst.load(wfstfile) != format_ok)
133  EST_error("failed to read WFST from \"%s\"",
134  (const char *)wfstfile);
135 
136  data = load_string_data(wfst,al.val("-data"));
137 
138  wfst_train(wfst,data);
139 
140  if (wfst.save(al.val("-o")) != write_ok)
141  EST_error("failed to write trained WFST to \"%s\"",
142  (const char *)al.val("-o"));
143 
144  return 0;
145 
146 }
147