Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
smooth_pda.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor */
34 /* Date : July 1994 */
35 /*-----------------------------------------------------------------------*/
36 /* Smooth F0 contours */
37 /*=======================================================================*/
38 /*#include <cmath>
39 #include <cstdlib>
40 #include <iostream>
41 #include <fstream>*/
42 //#include "sigpr/EST_pda.h"
43 #include "EST_Track.h"
44 #include "EST_Features.h"
45 #include "array_smoother.h"
46 #include "EST_math.h"
47 
48 void smooth_portion(EST_Track &c, EST_Features &op);
49 static void interp(const EST_Track &c, const EST_Track &speech, int fill,
50  EST_Track &interp);
51 static int parse_ms_list(EST_Features &al, struct Ms_Op *ms);
52 struct Ms_Op *default_ms_op(struct Ms_Op *ms);
53 
54 void smooth_phrase(EST_Track &fz, EST_Track &speech, EST_Features &op,
55  EST_Track &smi_fz)
56 {
57  int n=0;
58  EST_Track sm_fz;
59  char nstring[10];
60 
61  if (fz.empty())
62  {
63  smi_fz = fz;
64  return;
65  }
66  sm_fz = fz;
67  sm_fz.set_channel_name("F0", 0);
68 
69  n = (int)(op.F("window_length") / fz.shift());
70  sprintf(nstring, "%d", n);
71  op.set("point_window_size", nstring);
72 
73  if (!op.present("icda_no_smooth"))
74  smooth_portion(sm_fz, op);
75 
76  if (op.present("icda_no_interp"))
77  {
78  sm_fz = fz;
79  return; // no unvoiced interpolation
80  }
81 
82  int fill = op.present("icda_fi") ? 1 : 0;
83  interp(sm_fz, speech, fill, smi_fz); // fill unvoiced region
84 
85  n = (int)(op.F("second_length") / fz.shift());
86  sprintf(nstring, "%d", n);
87  op.set("point_window_size", nstring);
88 
89  if (!op.present("icda_no_smooth"))
90  smooth_portion(smi_fz, op);
91 }
92 
93 void smooth_portion(EST_Track &c, EST_Features &op)
94 {
95  int i;
96  float *a; // need float * so it can be passed to array_smoother
97  struct Ms_Op *ms;
98  ms = new Ms_Op;
99 
100  default_ms_op(ms);
101  parse_ms_list(op, ms);
102 
103  if (op.present("point_window_size"))
104  ms->window_length = op.I("point_window_size");
105 
106  a = new float[c.num_frames()];
107 
108  for (i = 0; i < c.num_frames(); ++i)
109  a[i] = c.track_break(i) ? -1.0 : c.a(i);
110 
111  array_smoother(a, c.num_frames(), ms);
112 
113  for (i = 0; i < c.num_frames(); ++i)
114  { // occasionally NaNs result...
115  if (isnanf(a[i]))
116  {
117  c.set_break(i);
118  c.a(i) = 0.0;
119  }
120  else
121  {
122  if (a[i] < 0.0)
123  c.set_break(i);
124  else
125  c.set_value(i);
126  c.a(i) = a[i];
127  }
128  }
129 
130  delete a;
131 }
132 
133 static void interp(const EST_Track &c, const EST_Track &speech, int fill,
134  EST_Track &interp)
135 {
136  // Interpolate between unvoiced sections, and ensure breaks
137  // during silences
138  int i, n, p;
139  float m;
140  float n_val, p_val;
141  float f = c.shift();
142 
143  interp = c; // copy track
144 
145  if (speech.num_frames() < c.num_frames())
146  interp.resize(speech.num_frames(), interp.num_channels());
147 
148 
149  for (i = 1; i < interp.num_frames(); ++i)
150  {
151  if ((fill == 1) || (speech.a(i) > 0.5))
152  {
153  if (!interp.track_break(i))
154  continue; // already has a value
155 
156  p = i - 1;
157  if ((n = interp.next_non_break(i)) == 0)
158  n = interp.num_frames() - 1;
159  n_val = interp.a(n);
160  p_val = interp.a(p);
161  if (n_val <= 0) n_val = p_val;
162  if (p_val <= 0) p_val = n_val;
163  // if they are both zero, well we'll learn to live it.
164  m = (n_val - p_val) / ( interp.t(n) - interp.t(p));
165 
166  interp.a(i) = (m * f) + p_val;
167  interp.set_value(i);
168  }
169  else
170  interp.set_break(i);
171  }
172 }
173 
174 int parse_ms_list(EST_Features &al, struct Ms_Op *ms)
175 {
176  default_ms_op(ms);
177 
178  if (al.present("smooth_double"))
179  ms->smooth_double = al.I("smooth_double");
180  if (al.present( "hanning"))
181  ms->apply_hanning = al.I("hanning");
182  if (al.present("extrapolate"))
183  ms->extrapolate = al.I("extrapolate");
184  if (al.present("first_length"))
185  ms->first_median = al.I("first_length");
186  if (al.present("second_length"))
187  ms->second_median = al.I("second_length");
188  if (al.present("window_length"))
189  ms->window_length = al.I("window_length");
190 
191  return 0;
192 }