Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
sigpr_utt.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Authors: Paul Taylor and Simon King */
34 /* Date : March 1998 */
35 /*-----------------------------------------------------------------------*/
36 /* Signal processing functions which operate on entire utterances */
37 /* */
38 /*=======================================================================*/
39 
40 
41 #include "EST_error.h"
42 #include "EST_track_aux.h"
43 #include "EST_inline_utils.h"
44 #include "sigpr/EST_fft.h"
45 #include "sigpr/EST_sigpr_frame.h"
46 #include "sigpr/EST_sigpr_utt.h"
47 
48 #include "EST_Features.h"
49 #include "EST_types.h"
50 #include "EST_string_aux.h"
51 
52 void sigpr_acc(EST_Wave &sig, EST_Track &fv, EST_Features &op,
53  const EST_StrList &slist);
54 
55 void sigpr_delta(EST_Wave &sig, EST_Track &fv, EST_Features &op,
56  const EST_StrList &slist);
57 
58 
59 
60 static void parse_op_settings(EST_Features &op, EST_WindowFunc *&wf, float &f)
61 {
62  EST_String w_name;
63 
64  if (op.present("window_type"))
65  w_name = op.S("window_type");
66  else
67  w_name = DEFAULT_WINDOW_NAME;
68  wf = EST_Window::creator(w_name);
69 
70  f = op.present("frame_factor") ? op.F("frame_factor")
71  : DEFAULT_FRAME_FACTOR;
72 }
73 
74 void add_channels_to_map(EST_StrList &map, EST_StrList &types,
75  EST_Features &op, int delta_order)
76 {
77  EST_String t;
78  EST_String dos;
79 
80  if (delta_order == 0)
81  dos = "";
82  else if (delta_order == 1)
83  dos = "_d";
84  else if (delta_order == 2)
85  dos = "_a";
86  else
87  EST_error("Requested delta order too high: %d\n", delta_order);
88 
89 
90 
91  for (EST_Litem *s = types.head(); s; s = s->next())
92  {
93  t = types(s);
94  if (op.present(t + "_order"))
95  {
96  int actual_order = op.I(t + "_order");
97  if(actual_order < 1)
98  {
99  cerr << "Invalid " << t << "_order" << " : ";
100  cerr << actual_order;
101  cerr << " (using 1 instead) " << endl;
102  actual_order = 1;
103  }
104 
105  int lowest_coef=0,highest_coef=actual_order-1;
106 
107  if(t == "lpc")
108  // For lpc coefficients, we ALWAYS include energy as the
109  // 0th coefficient, so when the users gives lpc_order of
110  // 16, we produce 17 coefficients (0 to 16)
111  highest_coef=actual_order;
112 
113 
114  if(t == "melcep")
115  {
116  // Mel cepstra have special names - if we are not
117  // including c0, then the coefficients are numbered
118  // 1...order, and NOT 0...order-1
119  highest_coef=actual_order;
120  if(op.present("include_c0"))
121  lowest_coef = 0;
122  else
123  lowest_coef = 1;
124  }
125 
126  if(actual_order == 1)
127  map.append(t + dos);
128  else
129  map.append("$" + t + dos + "-"+itoString(lowest_coef)+"+"+itoString(highest_coef));
130  }
131  else
132  map.append(t + dos);
133  }
134 }
135 
136 void sigpr_base(EST_Wave &sig, EST_Track &fv, EST_Features &op,
137  const EST_StrList &slist)
138 {
139  EST_Track fill, tmp;
140  EST_String b_name;
141  EST_String k;
142  float frame_factor;
143  EST_WindowFunc *wf;
144 
145  int fbank_order;
146  float liftering_parameter=0;
147  bool use_power_rather_than_energy=false, take_logs=true, include_c0=false;
148 
149  parse_op_settings(op, wf, frame_factor);
150 
151  for (EST_Litem *s = slist.head(); s; s = s->next())
152  {
153  k = slist(s);
154 
155  EST_String start_channel="0";
156  if( (slist(s) == "melcep") && !op.present("include_c0"))
157  start_channel = "1";
158 
159  if (fv.has_channel(k))
160  fv.sub_track(fill, 0, EST_ALL, k , 1);
161  else
162  fv.sub_track(fill, 0, EST_ALL, k + "_" + start_channel, k + "_N");
163 
164  if(op.present("usepower"))
165  cerr << "USING POWER" << endl;
166 
167  if ((slist(s) == "lpc") || (slist(s) == "cep")
168  ||(slist(s) == "ref") || (slist(s) == "lsf"))
169  sig2coef(sig, fill, slist(s), frame_factor, wf);
170  else if (slist(s) == "power")
171  power(sig, fill, frame_factor);
172  else if (slist(s) == "energy")
173  energy(sig, fill, frame_factor);
174  else if (slist(s) == "f0")
175  {
176  op.set("srpd_resize", 0);
177  op.set("pda_frame_shift", op.F("frame_shift"));
178  pda(sig, fill, op, "srpd");
179  }
180 // else if (slist(s) == "rasta")
181 // rasta(sig, fill, op);
182 
183  else if (slist(s) == "fbank")
184  {
185  use_power_rather_than_energy = op.present("usepower");
186  fbank(sig, fill, frame_factor, wf, use_power_rather_than_energy,
187  take_logs);
188  }
189 
190  else if (slist(s) == "melcep")
191  {
192  fbank_order=op.I("fbank_order");
193  use_power_rather_than_energy = op.present("usepower");
194  include_c0=op.present("include_c0");
195 
196  if(op.present("lifter"))
197  liftering_parameter=op.F("lifter");
198 
199  //cerr << "calling melcep " << fill.num_channels() << endl;
200 
201  melcep(sig, fill, frame_factor, fbank_order,
202  liftering_parameter, wf, include_c0,
203  use_power_rather_than_energy);
204  }
205  else
206  EST_error("Error: Unnknown type of processing requested: %s\n",
207  ((const char*) slist(s)));
208  }
209 }
210 
211 void sigpr_delta(EST_Wave &sig, EST_Track &fv, EST_Features &op,
212  const EST_String &k)
213 {
214  EST_Track base, fill;
215 
216 // cout << "type: " << k << endl;
217 
218  // look to see if base coefficients already exist
219  EST_String start_channel="0";
220  if( (k == "melcep") && !op.present("include_c0"))
221  start_channel = "1";
222 
223  if (fv.has_channel(k))
224  fv.sub_track(base, 0, EST_ALL, k , 1);
225  else if (fv.has_channel(k + "_" + start_channel))
226  fv.sub_track(base, 0, EST_ALL, k + "_" + start_channel, k + "_N");
227  else // otherwise make them in temporary track
228  {
229 // cout << "making tmp cpoefs\n";
230  EST_StrList tmp_base, tmp_map;
231  tmp_base.append(k);
232  add_channels_to_map(tmp_map, tmp_base, op, 0);
233  base.resize(fv.num_frames(), tmp_map);
234 
235  base.fill_time(fv);
236 
237  base.set_equal_space(false);
238  sigpr_base(sig, base, op, tmp_base);
239 // cout << "BASE\n" << base;
240 // cout <<"after\n";
241  }
242 
243  if (fv.has_channel(k + "_d"))
244  fv.sub_track(fill, 0, EST_ALL, k+"_d", 1);
245  else
246  fv.sub_track(fill, 0, EST_ALL, k+"_d_" + start_channel, k+"_d_N");
247 
248 /* cout << "base\n";
249  track_info(base);
250  cout << "fill\n";
251  track_info(fill);
252 */
253 
254  delta(base, fill);
255 }
256 
257 void sigpr_acc(EST_Wave &sig, EST_Track &fv, EST_Features &op,
258  const EST_String &k)
259 {
260  EST_Track base, fill;
261 
262 // cout << endl << endl << "acc\n";
263 
264 // cout << "type: " << k << endl;
265 
266  // look to see if delta coefficients already exist
267  EST_String start_channel="0";
268  if( (k == "melcep") && !op.present("include_c0"))
269  start_channel = "1";
270  if (fv.has_channel(k+"_d"))
271  fv.sub_track(base, 0, EST_ALL, k + "_d", 1);
272  else if (fv.has_channel(k + "_d_" + start_channel))
273  fv.sub_track(base, 0, EST_ALL, k + "_d_" + start_channel, k + "_d_N");
274  else // otherwise make them in temporary track
275  {
276  EST_StrList tmp_base, tmp_map;
277  tmp_base.append(k);
278  add_channels_to_map(tmp_map, tmp_base, op, 1);
279  base.resize(fv.num_frames(), tmp_map);
280 
281  base.fill_time(fv);
282 
283  base.set_equal_space(false);
284  sigpr_delta(sig, base, op, tmp_base);
285  }
286 
287  if (fv.has_channel(k + "_a"))
288  fv.sub_track(fill, 0, EST_ALL, k+"_a", 1);
289  else
290  fv.sub_track(fill, 0, EST_ALL, k+"_a_" + start_channel, k+"_a_N");
291 
292 // cout << "base\n";
293 // track_info(base);
294 // cout << "fill\n";
295 // track_info(fill);
296 
297  delta(base, fill);
298 }
299 
300 void sigpr_acc(EST_Wave &sig, EST_Track &fv, EST_Features &op,
301  const EST_StrList &slist)
302 {
303  for (EST_Litem *s = slist.head(); s; s = s->next())
304  sigpr_acc(sig, fv, op, slist(s));
305 }
306 
307 void sigpr_delta(EST_Wave &sig, EST_Track &fv, EST_Features &op,
308  const EST_StrList &slist)
309 {
310  for (EST_Litem *s = slist.head(); s; s = s->next())
311  sigpr_delta(sig, fv, op, slist(s));
312 }
313 
314 
315 int get_frame_size(EST_Track &pms,
316  int i, int sample_rate, int prefer_prev)
317 {
318  int prev = -1;
319  int next = -1;
320 
321  if (i>0)
322  prev = irint((pms.t(i) - pms.t(i-1))*sample_rate);
323  if (i<pms.num_frames()-1)
324  next = irint((pms.t(i+1) - pms.t(i))*sample_rate);
325 
326  if (prefer_prev)
327  return prev>=0?prev:(next>=0?next:0);
328  return next>=0?next:(prev>=0?prev:0);
329 }
330 
331 float get_time_frame_size(EST_Track &pms, int i, int prefer_prev)
332 {
333  float prev = -1;
334  float next = -1;
335 
336  if (i > 0)
337  prev = pms.t(i) - pms.t(i-1);
338  if (i < pms.num_frames() -1)
339  next = pms.t(i+1) - pms.t(i);
340 
341  if (prefer_prev)
342  return prev>=0 ? prev: (next>=0 ? next : 0.0);
343  return next>=0 ? next: (prev>=0 ? prev : 0.0);
344 }
345 
346 /*void sig2lpc(EST_Wave &sig, EST_Track &lpc, EST_WindowFunc *wf, float factor)
347 {
348  int order = lpc.num_channels() - 1;
349  EST_FVector coefs(order + 1);
350  int k;
351  int window_start, window_size, length; // can be merged with window_size
352 
353  int sample_rate = sig.sample_rate();
354 
355  EST_FVector frame;
356 
357  for (k = 0; k < lpc.num_frames(); ++k)
358  {
359  int pos = irint(lpc.t(k) * sample_rate);
360 
361  length = get_local_frame_size(lpc, k, sig.sample_rate());
362  window_size = irint(length * factor);
363  window_start = pos - (window_size/2);
364 
365  EST_Window::window_signal(sig, wf, window_start,
366  window_size, frame, 1);
367 
368  lpc.frame(coefs, k);
369  sig2lpc(frame, coefs);
370  }
371  lpc.save("test.est", "est");
372 }
373 */
374 
375 /*typedef void EST_FrameFunc(const EST_FVector &in_frame,
376  EST_FVector &out_frame);
377 
378 void sig2coef(EST_Wave &sig, EST_Track &lpc, EST_WindowFunc *wf,
379  EST_FrameFunc *ff, float factor)
380 {
381  EST_FVector coefs, frame;
382  int start, size;
383 
384  for (int k = 0; k < lpc.num_frames(); ++k)
385  {
386  size = irint(get_local_frame_size(lpc, k, sig.sample_rate())* factor);
387  start = (irint(lpc.t(k) * sig.sample_rate()) - (size/2));
388 
389  EST_Window::window_signal(sig, wf, start, size, frame, 1);
390 
391  lpc.frame(coefs, k);
392  (*ff)(frame, coefs);
393  }
394 }
395 */
396 
397 void sig2coef(EST_Wave &sig, EST_Track &tr, EST_String type,
398  float factor, EST_WindowFunc *wf)
399 {
400  EST_FVector coefs, frame;
401  int start, size;
402 
403 // cout << "TYPE IS " << type << endl;
404 
405  for (int k = 0; k < tr.num_frames(); ++k)
406  {
407  if (factor < 0) // want fixed frame rate
408  size = (int)(-1.0 * factor * (float)sig.sample_rate());
409  else
410  size = irint(get_frame_size(tr, k, sig.sample_rate())* factor);
411  start = (irint(tr.t(k) * sig.sample_rate()) - (size/2));
412 
413  EST_Window::window_signal(sig, wf, start, size, frame, 1);
414 
415  tr.frame(coefs, k);
416  frame_convert(frame, "sig", coefs, type);
417  }
418 }
419 
420 void power(EST_Wave &sig, EST_Track &pow, float factor)
421 {
422  EST_FVector frame;
423  int window_start, window_size, pos, k;
424 
425  EST_WindowFunc *wf = EST_Window::creator("rectangular");
426 
427  for (k = 0; k < pow.num_frames(); ++k)
428  {
429  pos = irint(pow.t(k) * sig.sample_rate());
430  if (factor < 0) // want fixed frame rate
431  window_size = (int)(-1.0 * factor * (float)sig.sample_rate());
432  else
433  window_size = irint(get_frame_size(pow, k, sig.sample_rate())
434  * factor);
435  window_start = pos - window_size/2;
436  EST_Window::window_signal(sig, wf, window_start, window_size,frame, 1);
437 
438  sig2pow(frame, pow.a(k));
439  }
440 }
441 
442 void energy(EST_Wave &sig, EST_Track &pow, float factor)
443 {
444  EST_FVector frame;
445  int window_start, window_size, pos, k;
446 
447  EST_WindowFunc *wf = EST_Window::creator("rectangular");
448 
449  for (k = 0; k < pow.num_frames(); ++k)
450  {
451  pos = irint(pow.t(k) * sig.sample_rate());
452  if (factor < 0) // want fixed frame rate
453  window_size = (int)(-1.0 * factor * (float)sig.sample_rate());
454  else
455  window_size = irint(get_frame_size(pow, k, sig.sample_rate())
456  * factor);
457  window_start = pos - window_size/2;
458  EST_Window::window_signal(sig, wf, window_start, window_size,frame,1);
459 
460  sig2rms(frame, pow.a(k));
461  }
462 }
463 
464 static EST_String determine_type(const EST_String &intype)
465 {
466  return (intype.contains("_") ? intype.before("_"): intype);
467 }
468 
469 void convert_track(EST_Track &in_track, EST_Track &out_track,
470  const EST_String &out_type, const EST_String &in_type)
471 {
472  if (in_track.num_frames() != out_track.num_frames())
473  EST_error("In track has %d frames, out track has %d\n",
474  in_track.num_frames(), out_track.num_frames());
475 
476  EST_String tmp;
477  tmp = ((in_type == "") ? determine_type(in_track.channel_name(0)):in_type);
478 
479  EST_FVector in_frame(in_track.num_channels());
480  EST_FVector out_frame(out_track.num_channels());
481 
482  for (int i = 0; i < in_track.num_frames(); ++i)
483  {
484  in_track.frame(in_frame, i);
485  out_track.frame(out_frame, i);
486  frame_convert(in_frame, tmp, out_frame, out_type);
487  }
488 }
489 
490 
491 
492 void fbank(EST_Wave &sig,
493  EST_Track &fbank_track,
494  const float factor,
495  EST_WindowFunc *wf,
496  const bool use_power_rather_than_energy,
497  const bool take_log)
498 {
499 
500  // still to add : high/low pass filtering
501 
502  int window_start, window_size, pos, k;
503  EST_FVector frame,fbank_frame;
504 
505  // get_order(...) gives wrong answer ... Paul ?
506  int fbank_order = fbank_track.num_channels();
507 
508  // sanity check
509  if(fbank_order < 1)
510  {
511  EST_error("Filterbank order of %i makes no sense.\n",fbank_order);
512  return;
513  }
514 
515  for (k = 0; k < fbank_track.num_frames(); ++k)
516  {
517  if (factor < 0) // want fixed frame rate
518  window_size = (int)(-1.0 * factor * (float)sig.sample_rate());
519  else
520  window_size = irint(get_frame_size(fbank_track, k, sig.sample_rate())
521  * factor);
522  pos = irint(fbank_track.t(k) * sig.sample_rate());
523  window_start = pos - window_size/2;
524  EST_Window::window_signal(sig, wf, window_start, window_size,frame, 1);
525 
526  fbank_track.frame(fbank_frame,k);
527  sig2fbank(frame,fbank_frame,sig.sample_rate(),
528  use_power_rather_than_energy,take_log);
529 
530  }
531 
532 
533 }
534 
535 
536 void melcep(EST_Wave &sig, EST_Track &mfcc_track,
537  float factor,
538  int fbank_order,
539  float liftering_parameter,
540  EST_WindowFunc *wf,
541  const bool include_c0,
542  const bool use_power_rather_than_energy)
543 {
544 
545  EST_FVector frame,mfcc_frame,fbank_frame;
546  int k;
547 
548  // first, do filterbank analysis
549  // need a temporary track, with the same setup as mfcc_track
550  EST_Track fbank_track;
551 
552 // cout << "MELPCEP\n" << fbank_order << endl;
553 
554  fbank_track.resize(mfcc_track.num_frames(), fbank_order);
555  fbank_track.fill_time(mfcc_track);
556  fbank_track.set_equal_space(false);
557 
558  // temp removed by pault 24/02/99
559 // make_timed_track(mfcc_track, fbank_track, "filter", fbank_order, 0);
560 
561  // 'true' makes fbank(...) take logs
562  fbank(sig, fbank_track, factor, wf, use_power_rather_than_energy, true);
563 
564  /*
565  if(include_c0)
566  cerr << "melcep c0" << endl;
567  else
568  cerr << "melcep no c0" << endl;
569  */
570  for (k = 0; k < mfcc_track.num_frames(); ++k)
571  {
572 
573  mfcc_track.frame(mfcc_frame,k);
574  fbank_track.frame(fbank_frame,k);
575 
576  fbank2melcep(fbank_frame, mfcc_frame,liftering_parameter,include_c0);
577  }
578 }