Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
EST_FeatureData.cc
1 /************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /************************************************************************/
33 /* */
34 /* Author: Paul Taylor Caley */
35 /* Date: July 1998 */
36 /* -------------------------------------------------------------------- */
37 /* Feature Data Class */
38 /* */
39 /************************************************************************/
40 
41 #include "EST_TMatrix.h"
42 #include "EST_Val.h"
43 #include "EST_FeatureData.h"
44 #include "EST_string_aux.h"
45 #include "EST_Token.h"
46 #include "EST_FileType.h"
47 #include "EST_error.h"
48 #include <iostream>
49 #include <fstream>
50 
51 #include "EST_THash.h"
52 
53 
54 EST_FeatureData::EST_FeatureData()
55 {
56  default_vals();
57 }
58 
59 
60 
61 EST_FeatureData::EST_FeatureData(const EST_FeatureData &a)
62 {
63  default_vals();
64  copy(a);
65 }
66 
67 EST_FeatureData::~EST_FeatureData(void)
68 {
69 }
70 
71 int EST_FeatureData::num_samples() const
72 {
73  return fd.num_rows();
74 }
75 
76 int EST_FeatureData::num_features() const
77 {
78  return fd.num_columns();
79 }
80 
81 
82 void EST_FeatureData::default_vals()
83 {
84 /* cout << "Default values\n";
85  p_sub_fd = false;
86  p_info = new EST_FeatureInfo;
87 */
88 }
89 
90 void EST_FeatureData::set_num_samples(int num_samples, bool preserve)
91 {
92  fd.resize(num_samples, fd.num_columns(), preserve);
93 }
94 
95 void EST_FeatureData::resize(int num_samples, int num_features, bool preserve)
96 {
97  // If enlargement is required, give new features dummy names
98  // and set their types to <STRING>. If preserve is set to 0
99  // rename all features this way.
100 
101  if (num_features > fd.num_columns())
102  {
103  int i;
104  if (preserve)
105  i = fd.num_columns();
106  else
107  i = 0;
108  for (; i < num_features; ++i)
109  info.set("unnamed_" + itoString(i), "<STRING>");
110  }
111 
112  fd.resize(num_samples, num_features, preserve);
113 }
114 
115 void EST_FeatureData::resize(int num_samples, EST_Features &f, bool preserve)
116 {
117  fd.resize(num_samples, f.length(), preserve);
118  info = f;
119 }
120 
121 EST_String EST_FeatureData::type(const EST_String &feature_name)
122 {
123  EST_String t = info.S(feature_name);
124 
125  if (t.contains("<", 0)) // i.e. a predefined type
126  return t;
127 
128  return "undef";
129 }
130 
131 EST_StrList EST_FeatureData::values(const EST_String &feature_name)
132 {
133  EST_StrList v;
134  EST_String t = info.S(feature_name);
135 
136  // check for infinite set:
137  if ((t == "<FLOAT>") || (t == "<INT>") || (t == "<STRING>"))
138  return v;
139 
140  StringtoStrList(t, v);
141  return v;
142 }
143 
144 int EST_FeatureData::feature_position(const EST_String &feature_name)
145 {
146  int i;
147 
149 
150  for (i = 0, p.begin(info); p; ++p, ++i)
151  {
152 // cout << "looking at " << info.fname(p) << endl;
153 // cout << "i = " << i << endl;
154  if (p->k == feature_name)
155  return i;
156  }
157 
158  EST_error("No such feature %s\n", (const char *) feature_name);
159  return 0;
160 }
161 
162 int EST_FeatureData::update_values(const EST_String &feature_name, int max)
163 {
164  // This should be converted back to Hash tables once extra
165  // iteration functions are added the EST_Hash.
166  int i, col;
167  EST_Features values;
168  EST_String v;
169 
170 // EST_TStringHash<int> values(max);
171 
172  col = feature_position(feature_name);
173 
174  for (i = 0; i < num_samples(); ++i)
175  values.set(fd.a(i, col).string(), 1);
176 
177  // check to see if there are more types than allowed, if so
178  // just set to open set STRING
179  if (values.length() > max)
180  v = "<STRING>";
181  else
182  {
184  for(p.begin(values); p; ++p)
185  v += p->k + " ";
186  }
187 
188  info.set(feature_name, v);
189 
190  return values.length();
191 }
192 
193 EST_FeatureData & EST_FeatureData::copy(const EST_FeatureData &a)
194 {
195  (void) a;
196 /* // copy on a sub can't alter header information
197  if (!p_sub_fd)
198  {
199  delete p_info;
200  *p_info = *(a.p_info);
201  }
202  // but data can be copied so long as no resizing is involved.
203  EST_ValMatrix::operator=(a);
204 */
205  return *this;
206 }
207 
208 /*void EST_FeatureData::a(int i, int j)
209 {
210  return EST_ValMatrix::a(i, j);
211 }
212 */
213 /*
214 EST_Val &EST_FeatureData::operator()(int i, int j)
215 {
216  return a(i, j);
217 }
218 
219 EST_Val &EST_FeatureData::operator()(int s, const EST_String &f)
220 {
221  int i = info().field_index(f);
222  return a(s, i);
223 }
224 
225 EST_FeatureData &EST_FeatureData::operator=(const EST_FeatureData &f)
226 {
227  return copy(f);
228 }
229 
230 */
231 EST_Val &EST_FeatureData::a(int i, const EST_String &f)
232 {
233  (void)f;
234  return fd.a(i, 0);
235 }
236 
237 EST_Val &EST_FeatureData::a(int i, int j)
238 {
239  return fd.a(i, j);
240 }
241 const EST_Val &EST_FeatureData::a(int i, const EST_String &f) const
242 {
243  (void)f;
244  return fd.a(i, 0);
245 }
246 
247 const EST_Val &EST_FeatureData::a(int i, int j) const
248 {
249  return fd.a(i, j);
250 }
251 
252 
253 /*
254 void EST_FeatureData::sub_samples(EST_FeatureData &f, int start, int num)
255 {
256  sub_matrix(f, start, num);
257  f.p_info = p_info;
258  f.p_sub_fd = true;
259 }
260 
261 void EST_FeatureData::extract_named_fields(const EST_String &fields)
262 {
263  EST_FeatureData n;
264  // there must be a more efficient way than a copy?
265  extract_named_fields(n, fields);
266  *this = n;
267 }
268 
269 void EST_FeatureData::extract_named_fields(const EST_StrList &fields)
270 {
271  EST_FeatureData n;
272  // there must be a more efficient way than a copy?
273  extract_named_fields(n, fields);
274  *this = n;
275 }
276 
277 void EST_FeatureData::extract_numbered_fields(const EST_String &fields)
278 {
279  EST_FeatureData n;
280  // there must be a more efficient way than a copy?
281  extract_numbered_fields(n, fields);
282  *this = n;
283 }
284 
285 void EST_FeatureData::extract_numbered_fields(const EST_IList &fields)
286 {
287  EST_FeatureData n;
288  // there must be a more efficient way than a copy?
289  extract_numbered_fields(n, fields);
290  *this = n;
291 }
292 
293 
294 void EST_FeatureData::extract_named_fields(EST_FeatureData &f,
295  const EST_String &fields) const
296 {
297  EST_StrList s;
298 
299  StringtoStrList(fields, s);
300  extract_named_fields(f, s);
301 }
302 void EST_FeatureData::extract_named_fields(EST_FeatureData &f,
303  const EST_StrList &n_fields) const
304 {
305  EST_Litem *p;
306  EST_StrList n_types;
307  int i, j;
308 
309  info().extract_named_fields(*(f.p_info), n_fields);
310 
311  for (p = n_fields.head(), i = 0; i < f.num_fields(); ++i, p = p->next())
312  for (j = 0; j < f.num_samples(); ++j)
313  f(j, i) = a(j, n_fields(p));
314 
315 }
316 
317 void EST_FeatureData::extract_numbered_fields(EST_FeatureData &f,
318  const EST_IList &fields) const
319 {
320  EST_Litem *p;
321  EST_StrList n_fields;
322  int i, j;
323 
324  for (p = fields.head(); p; p = p->next())
325  n_fields.append(info().field_name(fields(p)));
326 
327  info().extract_named_fields(*(f.p_info), n_fields);
328 
329  for (p = fields.head(), i = 0; i < f.num_fields(); ++i, p = p->next())
330  for (j = 0; j < f.num_samples(); ++j)
331  f(j, i) = a(j, fields(p));
332 
333 }
334 
335 void EST_FeatureData::extract_numbered_fields(EST_FeatureData &f,
336  const EST_String &fields) const
337 {
338  EST_StrList s;
339  EST_IList il;
340 
341  StringtoStrList(fields, s);
342  StrListtoIList(s, il);
343  extract_numbered_fields(f, il);
344 }
345 */
346 
347 EST_write_status save_est(const EST_FeatureData &f, const EST_String &filename)
348 {
349  (void)f;
350  (void)filename;
351 /*
352  ostream *outf;
353  EST_Litem *s, *e;
354  int i;
355  if (filename == "-")
356  outf = &cout;
357  else
358  outf = new ofstream(filename);
359 
360  if (!(*outf))
361  return write_fail;
362 
363  outf->precision(5);
364  outf->setf(ios::fixed, ios::floatfield);
365  outf->width(8);
366 
367  *outf << "EST_File feature_data\n"; // EST header identifier
368  *outf << "DataType ascii\n";
369  *outf << "NumSamples " << f.num_samples() << endl;
370  *outf << "NumFields " << f.num_fields() << endl;
371  *outf << "FieldNames " << f.info().field_names();
372  *outf << "FieldTypes " << f.info().field_types();
373  if (f.info().group_start.length() > 0)
374  for (s = f.info().group_start.head(), e = f.info().group_end.head();
375  s; s = s->next(), e = e->next())
376  *outf << "Group " << f.info().group_start.key(s) << " " <<
377  f.info().group_start.val(s) << " " << f.info().group_end.val(e) << endl;
378 
379  for (i = 0; i < f.num_fields(); ++i)
380  if (f.info().field_values(i).length() > 0)
381  *outf << "Field_" << i << "_Values "
382  << f.info().field_values(i) << endl;
383 
384  *outf << "EST_Header_End\n"; // EST end of header identifier
385 
386 // *outf << ((EST_ValMatrix ) f);
387  *outf << f;
388  */
389 
390  return write_ok;
391 }
392 
393 
394 EST_write_status EST_FeatureData::save(const EST_String &filename,
395  const EST_String &file_type) const
396 {
397  if ((file_type == "est") || (file_type == ""))
398  return save_est(*this, filename);
399 /* else if (file_type = "octave")
400  return save_octave(*this, filename);
401  else if (file_type = "ascii")
402  return save_ascii(*this, filename);
403 */
404 
405  cerr << "Can't save feature data in format \"" << file_type << endl;
406  return write_fail;
407 }
408 
409 
410 
411 EST_read_status EST_FeatureData::load(const EST_String &filename)
412 {
413  int i, j;
414  EST_Option hinfo;
415  EST_String k, v;
416  EST_read_status r;
417  bool ascii;
418  EST_TokenStream ts;
419  EST_EstFileType t;
420  int ns, nf;
421 
422  if (((filename == "-") ? ts.open(cin) : ts.open(filename)) != 0)
423  {
424  cerr << "Can't open track file " << filename << endl;
425  return misc_read_error;
426  }
427  // set up the character constant values for this stream
428  ts.set_SingleCharSymbols(";");
429  ts.set_quotes('"','\\');
430 
431  if ((r = read_est_header(ts, hinfo, ascii, t)) != format_ok)
432  {
433  cerr << "Error reading est header of file " << filename << endl;
434  return r;
435  }
436 
437  if (t != est_file_feature_data)
438  {
439  cerr << "Not a EST Feature Data file: " << filename << endl;
440  return misc_read_error;
441  }
442 
443  ns = hinfo.ival("NumSamples");
444  nf = hinfo.ival("NumFeatures");
445 
446  cout << "ns: " << ns << endl;
447  cout << "nf: " << nf << endl;
448  resize(ns, nf);
449 
450  info.clear(); // because resize will make default names
451 
452  for (i = 0; i < nf; ++i)
453  {
454  k = "Feature_" + itoString(i+1);
455  if (hinfo.present(k))
456  {
457  v = hinfo.val(k);
458  info.set(v.before(" "), v.after(" "));
459  cout << "value: " << v.after(" ") << endl;
460  }
461  else
462  EST_error("No feature definition given for feature %d\n", i);
463  }
464 
465  for (i = 0; i < ns; ++i)
466  {
468  for (p.begin(info), j = 0; j < nf; ++j, ++p)
469  {
470  if (p->k == "<FLOAT>")
471  a(i, j) = atof(ts.get().string());
472  else if (p->k == "<BOOL>")
473  a(i, j) = atoi(ts.get().string());
474  else if (p->k == "<INT>")
475  a(i, j) = atoi(ts.get().string());
476  else
477  a(i, j) = ts.get().string();
478  }
479  }
480 
481  return format_ok;
482 }
483 
484 /*ostream& operator << (ostream &st, const EST_FeatureInfo &a)
485 {
486 
487 // st << a.field_names() << endl;
488 // st << a.field_types() << endl;
489 
490  return st;
491 }
492 */
493 
494 ostream& operator << (ostream &st, const EST_FeatureData &d)
495 {
496  int i, j;
497  EST_String t;
498  EST_Val v;
499 
500 // st << a;
501 
502 // EST_ValMatrix::operator<<(st, (EST_ValMatrix)a);
503 
504  for (i = 0; i < d.num_samples(); ++i)
505  {
506  for (j = 0; j < d.num_features(); ++j)
507  {
508  v = d.a(i, j);
509  st << v << " ";
510 // cout << "field type " << a.info().field_type(j) << endl;
511 /* else if (a.info().field_type(j) == "float")
512  st << a.a(i, j);
513  else if (a.info().field_type(j) == "int")
514  st << a.a(i, j);
515 
516  else if (a.info().field_type(j) == "string")
517  {
518  // st << "\"" << a.a(i, j) << "\"";
519  t = a.a(i, j);
520  t.gsub(" ", "_");
521  st << t;
522  }
523 */
524  }
525  st << endl;
526  }
527 
528  return st;
529 }