Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
ols_main.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : January 1998 */
35 /*-----------------------------------------------------------------------*/
36 /* Ordinary least squares */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <iostream>
41 #include <fstream>
42 #include <cstring>
43 #include "EST_Wagon.h"
44 #include "EST_multistats.h"
45 #include "EST_cmd_line.h"
46 
47 static void load_ols_data(EST_FMatrix &X, EST_FMatrix &Y, WDataSet &d);
48 static int ols_main(int argc, char **argv);
49 
50 
51 /** @name <command>ols</command> <emphasis>Train linear regression model</emphasis>
52  @id ols-manual
53  * @toc
54  */
55 
56 //@{
57 
58 
59 /**@name Synopsis
60  */
61 //@{
62 
63 //@synopsis
64 
65 /**
66  */
67 
68 //@}
69 
70 /**@name OPTIONS
71  */
72 //@{
73 
74 //@options
75 
76 //@}
77 
78 
79 int main(int argc, char **argv)
80 {
81  return ols_main(argc,argv);
82 }
83 
84 static int ols_main(int argc, char **argv)
85 {
86  // Top level function loads in sample data and finds coefficients
87  EST_Option al;
88  EST_StrList files;
89  EST_String ofile = "-";
90  WDataSet dataset,test_dataset;
91  EST_FMatrix coeffs;
92  EST_FMatrix X,Y,Xtest,Ytest;
93  LISP ignores = NIL;
94 
95  parse_command_line
96  (argc, argv,
97  EST_String("[options]\n")+
98  "Summary: Linear Regression by ordinary least squares (defaults in {})\n"+
99  "-desc <ifile> Field description file\n"+
100  "-data <ifile> Datafile, one vector per line\n"+
101  "-test <ifile> Datafile, for testing\n"+
102  "-robust Robust, may take longer\n"+
103  "-stepwise Order the features by contribution,\n"+
104  " implies robust.\n"+
105  "-swlimit <float> {0.0}\n"+
106  " Percentage necessary improvement for stepwise\n"+
107  "-quiet No summary\n"+
108  "-o <ofile> \n"+
109  "-output <ofile> Output file for coefficients\n"+
110  "-ignore <string> Filename or bracket list of fields to ignore\n",
111  files, al);
112 
113 
114  if (al.present("-output"))
115  ofile = al.val("-output");
116  if (al.present("-o"))
117  ofile = al.val("-o");
118 
119  siod_init();
120 
121  if (al.present("-ignore"))
122  {
123  EST_String ig = al.val("-ignore");
124  if (ig[0] == '(')
125  ignores = read_from_string(ig);
126  else
127  ignores = vload(ig,1);
128  }
129 
130  // Load in the data
131  if (!al.present("-desc"))
132  {
133  cerr << "ols: no description file specified\n";
134  return -1;
135  }
136  else
137  {
138  dataset.load_description(al.val("-desc"),ignores);
139  dataset.ignore_non_numbers();
140  }
141  if (!al.present("-data"))
142  {
143  cerr << "ols: no data file specified\n";
144  return -1;
145  }
146  else
147  wgn_load_dataset(dataset,al.val("-data"));
148  if (al.present("-test"))
149  {
150  test_dataset.load_description(al.val("-desc"),ignores);
151  test_dataset.ignore_non_numbers();
152  wgn_load_dataset(test_dataset,al.val("-test"));
153  load_ols_data(Xtest,Ytest,test_dataset);
154  }
155  else
156  // No test data specified so use training data
157  load_ols_data(Xtest,Ytest,dataset);
158 
159  load_ols_data(X,Y,dataset);
160 
161  if (al.present("-stepwise"))
162  {
163  EST_StrList names;
164  float swlimit = al.fval("-swlimit");
165  EST_IVector included;
166  int i;
167 
168  names.append("Intercept");
169  for (i=1; i < dataset.width(); i++)
170  names.append(dataset.feat_name(i));
171 
172  included.resize(X.num_columns());
173  included[0] = TRUE; // always guarantee interceptor
174  for (i=1; i<included.length(); i++)
175  {
176  if (dataset.ignore(i) == TRUE)
177  included.a_no_check(i) = OLS_IGNORE;
178  else
179  included.a_no_check(i) = FALSE;
180  }
181 
182  if (!stepwise_ols(X,Y,names,swlimit,coeffs,Xtest,Ytest,included))
183  {
184  cerr << "OLS: failed stepwise ols" << endl;
185  return -1;
186  }
187  }
188  else if (al.present("-robust"))
189  {
190  EST_IVector included;
191  int i;
192 
193  included.resize(X.num_columns());
194  included[0] = TRUE; // always guarantee interceptor
195  for (i=1; i<included.length(); i++)
196  {
197  if (dataset.ignore(i) == TRUE)
198  included.a_no_check(i) = OLS_IGNORE;
199  else
200  included.a_no_check(i) = TRUE;
201  }
202 
203  if (!robust_ols(X,Y,included,coeffs))
204  {
205  cerr << "OLS: failed robust ols" << endl;
206  return -1;
207  }
208  }
209  else if (!ols(X,Y,coeffs))
210  {
211  cerr << "OLS: failed no pseudo_inverse" << endl;
212  return -1;
213  }
214 
215  if (coeffs.save(ofile) != write_ok)
216  {
217  cerr << "OLS: failed to save coefficients in \"" << ofile << "\""
218  << endl;
219  return -1;
220  }
221 
222  if (!al.present("-quiet"))
223  {
224  EST_FMatrix pred;
225  float cor,rmse;
226 
227  ols_apply(Xtest,coeffs,pred);
228  ols_test(Ytest,pred,cor,rmse);
229 
230  printf(";; RMSE %f Correlation is %f\n",rmse,cor);
231  }
232 
233  return 0;
234 }
235 
236 static void load_ols_data(EST_FMatrix &X, EST_FMatrix &Y, WDataSet &d)
237 {
238  EST_Litem *p;
239  int n,m;
240 
241  X.resize(d.length(),d.width());
242  Y.resize(d.length(),1);
243 
244  for (n=0,p=d.head(); p != 0; p=p->next(),n++)
245  {
246  Y.a_no_check(n,0) = d(p)->get_flt_val(0);
247  X.a_no_check(n,0) = 1;
248  for (m=1; m < d.width(); m++)
249  {
250  if (d.ignore(m))
251  {
252  X.a_no_check(n,m) = 0;
253  }
254  else
255  X.a_no_check(n,m) = d(p)->get_flt_val(m);
256  }
257  }
258 
259 }