Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
pitchmark.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author: Paul Taylor */
34 /* Date : December 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* Pitchmark Laryngograph Signals */
37 /* */
38 /*=======================================================================*/
39 
40 /* Note - this is based on a pitchmarker developed by Mike Macon and
41 written in matlab.
42 */
43 
44 #include "stdlib.h"
45 #include "sigpr/EST_filter.h"
46 #include "sigpr/EST_pitchmark.h"
47 #include "ling_class/EST_Relation.h"
48 #include "EST_math.h"
49 #include "EST_inline_utils.h"
50 #include "EST_wave_aux.h"
51 #include "EST_track_aux.h"
52 
53 
54 void delta(EST_Wave &tr, EST_Wave &d, int regression_length);
55 
56 EST_Track pitchmark(EST_Wave &lx, int lx_lf, int lx_lo, int lx_hf,
57  int lx_ho, int df_lf, int df_lo, int mo, int debug)
58 {
59  EST_Track pm;
60  EST_Wave lxdiff;
61 
62  pm.set_equal_space(false);
63  // pre-filtering
64 
65  if (debug)
66  cout << "pitchmark 1\n";
67 
68  FIRlowpass_double_filter(lx, lx_lf, lx_lo);
69  FIRhighpass_double_filter(lx, lx_hf, lx_ho);
70 
71  if (debug)
72  cout << "pitchmark 2\n";
73 
74  if (debug)
75  lx.save("tmpfilt.lx");
76 
77 // cout << "df " << df_lf << " df_o " << df_lo << endl;
78 
79 // lxdiff = lx;
80 // differentiate(lxdiff);
81  lxdiff.resize(lx.num_samples());
82  lxdiff.set_sample_rate(lx.sample_rate());
83  delta(lx, lxdiff, 4);
84 
85  if (debug)
86  lxdiff.save("tmpdiff.lx");
87 
88  // it was found that median smoothing worked better here.
89 
90  if (df_lo > 0)
91  FIRlowpass_double_filter(lxdiff, df_lf, df_lo);
92 
93  if (mo > 0)
94  simple_mean_smooth(lxdiff, mo);
95 
96  if (debug)
97  lxdiff.save("tmpfiltdiff.lx");
98 
99  neg_zero_cross_pick(lxdiff, pm);
100 
101  return pm;
102 }
103 
104 EST_Track pitchmark(EST_Wave &lx, EST_Features &op)
105 {
106  EST_Track pm;
107  EST_Wave lxdiff;
108  int lx_lf, lx_lo, lx_hf, lx_ho, df_lf, df_lo, mo, debug;
109 
110  lx_lf = op.present("lx_low_frequency") ?
111  op.I("lx_low_frequency") : 400;
112  lx_lo = op.present("lx_low_order") ?
113  op.I("lx_low_order") : 19;
114 
115  lx_hf = op.present("lx_high_frequency") ?
116  op.I("lx_high_frequency") : 40;
117  lx_ho = op.present("lx_high_order") ?
118  op.I("lx_high_order") : 19;
119 
120  df_lf = op.present("df_low_frequency") ?
121  op.I("df_low_frequency") : 1000;
122  df_lo = op.present("df_low_order") ?
123  op.I("df_low_order") : 0;
124 
125  mo = op.present("median_order") ?
126  op.I("median_order") : 19;
127 
128  debug = op.present("pm_debug") ? 1 : 0;
129 
130  return pitchmark(lx, lx_lf, lx_lo, lx_hf, lx_ho, df_lf, df_lo,
131  mo, debug);
132 }
133 
134 /** Iterate through track and eliminate any frame whose distance to a
135 preceding frames is less than min seconds*/
136 
137 void pm_min_check(EST_Track &pm, float min)
138 {
139  int i, j;
140 
141  for (i = j = 0; i < pm.num_frames() - 1; ++i, ++j)
142  {
143  pm.t(j) = pm.t(i);
144  while ((i < (pm.num_frames() - 1)) && ((pm.t(i + 1) - pm.t(i)) < min))
145  ++i;
146  }
147  if (i < pm.num_frames())
148  pm.t(j) = pm.t(i);
149  pm.resize(j, pm.num_channels());
150 }
151 
152 
153 void pm_fill(EST_Track &pm, float new_end, float max, float min, float def)
154 {
155  EST_FVector new_pm;
156 
157  if (new_end < 0)
158  new_end = pm.end();
159 
160 // if (debug)
161  // cout<< "new end:" << new_end << endl;
162  // largest possible set of new pitchmarks
163 
164 // cout << "num frames:" << pm.num_frames() << endl;
165 // cout << "num frames:" << pm.end() << endl;
166 // cout << "num frames:" << min << endl;
167  new_pm.resize(int(new_end / min));
168 // cout << "num frames:" << pm.end()/min << endl;
169 // cout << "num frames:" << new_pm.n() << endl;
170 
171  int i, j, npm=0;
172  float last = 0.0;
173 
174  int dropped=0, added=0;
175 
176  for(j = 0; j < pm.num_frames(); j++)
177  {
178  float current = pm.t(j);
179 
180  if (current > new_end)
181  break;
182 
183  if (current - last < min)
184  {
185  // drop current pitchmark
186  dropped++;
187  }
188 
189  else if (current-last > max)
190  {
191  // interpolate
192  int num = ifloor((current - last)/ def);
193  float size = (current-last) / num;
194  for (i = 1; i <= num; i++)
195  {
196  new_pm[npm] = last + i * size;
197  npm++;
198  added++;
199  }
200  }
201  else
202  {
203  new_pm[npm] = pm.t(j);
204  npm++;
205  }
206  last=current;
207  }
208 
209  if (new_end - last > max)
210  {
211  // interpolate
212  int num = ifloor((new_end - last)/ def);
213  float size = (new_end -last) / num;
214  for (i = 1; i <= num; i++)
215  {
216  new_pm[npm] = last + i * size;
217  npm++;
218  added++;
219  }
220  }
221 
222 // if (debug)
223 // if (dropped>0 || added >0)
224 // cout << "Dropped " << dropped<< " and added " << added << " PMs\n";
225 
226 // if (debug)
227  pm.resize(npm, pm.num_channels());
228  for (i = 0; i < npm; i++)
229  pm.t(i) = new_pm(i);
230 }
231 
232 void neg_zero_cross_pick(EST_Wave &lx, EST_Track &pm)
233 {
234  int i, j;
235  pm.resize(lx.num_samples(), EST_CURRENT);
236 
237  for (i = 1, j = 0; i < lx.num_samples(); ++i)
238  if ((lx.a(i -1) > 0) && (lx.a(i) <= 0))
239  pm.t(j++) = lx.t(i);
240 
241  pm.resize(j, EST_CURRENT);
242 
243  for (i = 0; i < pm.num_frames(); ++i)
244  pm.set_value(i);
245 }
246 
247 void pm_to_label(EST_Track &pm, EST_Relation &lab)
248 {
249  EST_Item *seg;
250  lab.clear();
251 
252  for (int i = 0; i < pm.num_frames(); ++i)
253  {
254  seg = lab.append();
255  seg->set("name","");
256  seg->set("end",pm.t(i));
257  }
258 }
259 
260 void pm_to_f0(EST_Track &pm, EST_Track &f0)
261 {
262  float prev_pm = 0.0;
263  f0 = pm;
264  f0.resize(EST_ALL, 1);
265 
266  for (int i = 0; i < f0.num_frames(); ++i)
267  {
268  f0.a(i, 0) = 1.0 / (f0.t(i) - prev_pm);
269  prev_pm = f0.t(i);
270  }
271 }
272 
273 void pm_to_f0(EST_Track &pm, EST_Track &fz, float shift)
274 {
275  int i;
276  float period;
277 
278  fz.resize((int)(pm.end()/shift), 1);
279  fz.fill_time(shift);
280 
281  for (i = 0; i < fz.num_frames() -1 ; ++i)
282  {
283  period = get_time_frame_size(pm, pm.index_below(fz.t(i)));
284  fz.a(i) = 1.0 /period;
285  }
286 }