Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
pda_main.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor */
34 /* Date : May 1994 */
35 /*-----------------------------------------------------------------------*/
36 /* Pitch Detection Algorithm Main routine */
37 /* */
38 /*=======================================================================*/
39 #include <fstream>
40 #include "EST.h"
41 #include "sigpr/EST_sigpr_utt.h"
42 #include "EST_cmd_line_options.h"
43 
44 void set_parameters(EST_Features &a_list, EST_Option &al);
45 
46 void option_override(EST_Features &op, EST_Option al,
47  const EST_String &option, const EST_String &arg);
48 
49 static int save_pm(EST_String filename, EST_Track fz);
50 
51 /** @name <command>pda</command> <emphasis>Pitch Detection Algorithm</emphasis>
52  @id pda-manual
53  * @toc
54  */
55 
56 //@{
57 
58 /**@name Synopsis
59  */
60 //@{
61 
62 //@synopsis
63 
64 /**
65 pda is a pitch detection algorithm that produces a fundamental frequency
66 contour from a speech waveform file. At present only the
67 super resolution pitch determination algorithm is implemented.
68 See (Medan, Yair, and Chazan, 1991) and (Bagshaw et al., 1993) for a detailed
69 description of the algorithm.
70 </para><para>
71 
72 The default values given below were found to optimise the performance
73 of the pitch determination algorithm for speech data sampled at 20kHz
74 using a 16\-bit waveform and low pass filter with a 600Hz cut-off
75 frequency and more than \-85dB rejection above 700Hz. The best
76 performances occur if the [\-p] flag is passed. </para><para>
77 */
78 
79 //@}
80 
81 /**@name Options
82  */
83 //@{
84 
85 //@options
86 
87 //@}
88 
89 
90 int main (int argc, char *argv[])
91 {
92  EST_Track fz;
93  EST_Wave sig;
94  EST_Option al;
95  EST_Features op;
96  EST_String out_file("-");
97  EST_StrList files;
98 
99  parse_command_line
100  (argc, argv,
101  EST_String("[input file] -o [output file] [options]\n")+
102  "Summary: pitch track waveform files\n"
103  "use \"-\" to make input and output files stdin/out\n"
104  "-h Options help\n\n"+
105  options_wave_input()+
106  options_pda_general()+
107  options_pda_srpd()+
108  options_track_output(),
109  files, al);
110 
111  default_pda_options(op);
112  set_parameters(op, al);
113 
114  if (read_wave(sig, files.first(), al) != format_ok)
115  exit(-1);
116 
117  out_file = al.present("-o") ? al.val("-o") : (EST_String)"-";
118 
119  pda(sig, fz, op); // do f0 tracking
120 
121  if (al.present("-pm"))
122  save_pm(out_file, fz);
123  else
124  fz.save(out_file, op.S("f0_file_type", "0"));
125 
126  if (al.present("-diff"))
127  {
128  fz = differentiate(fz);
129  fz.save(out_file + ".diff", op.S("f0_file_type", "0"));
130  }
131  return 0;
132 }
133 
134 
135 void set_parameters(EST_Features &op, EST_Option &al)
136 {
137  op.set("srpd_resize", 1);
138 
139  // general options
140  option_override(op, al, "pda_frame_shift", "-shift");
141  option_override(op, al, "pda_frame_length", "-length");
142  option_override(op, al, "max_pitch", "-fmax");
143  option_override(op, al, "min_pitch", "-fmin");
144 
145  // low pass filtering options.
146  option_override(op, al, "lpf_cutoff", "-u");
147  option_override(op, al, "lpf_order", "-forder");
148 
149  option_override(op, al, "decimation", "-d");
150  option_override(op, al, "noise_floor", "-n");
151  option_override(op, al, "min_v2uv_coef_thresh", "-m");
152  option_override(op, al, "v2uv_coef_thresh_ratio", "-R");
153  option_override(op, al, "v2uv_coef_thresh", "-H");
154  option_override(op, al, "anti_doubling_thresh", "-t");
155  option_override(op, al, "peak_tracking", "-P");
156 
157  option_override(op, al, "f0_file_type", "-otype");
158  option_override(op, al, "wave_file_type", "-itype");
159 
160  if (al.val("-L", 0) == "true")
161  op.set("do_low_pass", "true");
162  if (al.val("-R", 0) == "true")
163  op.set("do_low_pass", "false");
164 
165 
166 /* op.set("lpf_cutoff",al.val("-u", 0));
167  op.set("lpf_order",al.val("-forder", 0));
168 
169  //sprd options
170  op.set("decimation", al.val("-d", 0));
171  op.set("noise_floor", al.val("-n", 0));
172  op.set("min_v2uv_coef_thresh", al.val("-m", 0));
173  op.set("v2uv_coef_thresh_ratio", al.val("-r", 0));
174  op.set("v2uv_coef_thresh", al.val("-H", 0));
175  op.set("anti_doubling_thresh", al.val("-t", 0));
176  op.set("peak_tracking", al.val("-P", 0));
177  if (al.val("-L", 0) == "true")
178  op.set("do_low_pass", "true");
179  if (al.val("-R", 0) == "true")
180  op.set("do_low_pass", "false");
181  op.set("f0_file_type", al.val("-otype", 0));
182  op.set("wave_file_type", al.val("-itype", 0));
183 */
184 }
185 
186 /* a_list.override_val("sample_rate", al.val("-f", 0));
187  a_list.override_val("min_pitch", al.val("-fmin", 0));
188  a_list.override_val("max_pitch", al.val("-fmax", 0));
189  a_list.override_val("pda_frame_shift", al.val("-s", 0));
190  a_list.override_val("pda_frame_length",al.val("-l", 0));
191 
192  // low pass filtering options.
193  a_list.override_val("lpf_cutoff",al.val("-u", 0));
194  a_list.override_val("lpf_order",al.val("-forder", 0));
195 
196  //sprd options
197  a_list.override_val("decimation", al.val("-d", 0));
198  a_list.override_val("noise_floor", al.val("-n", 0));
199  a_list.override_val("min_v2uv_coef_thresh", al.val("-m", 0));
200  a_list.override_val("v2uv_coef_thresh_ratio", al.val("-r", 0));
201  a_list.override_val("v2uv_coef_thresh", al.val("-H", 0));
202  a_list.override_val("anti_doubling_thresh", al.val("-t", 0));
203  a_list.override_val("peak_tracking", al.val("-P", 0));
204  if (al.val("-L", 0) == "true")
205  a_list.override_val("do_low_pass", "true");
206  if (al.val("-R", 0) == "true")
207  a_list.override_val("do_low_pass", "false");
208  a_list.override_val("f0_file_type", al.val("-otype", 0));
209  a_list.override_val("wave_file_type", al.val("-itype", 0));
210 */
211 
212 
213 static int save_pm(EST_String filename, EST_Track fz)
214 {
215  ostream *outf;
216  float position, period;
217 
218  if (filename == "-")
219  outf = &cout;
220  else
221  outf = new ofstream(filename);
222 
223  if (!(*outf))
224  {
225  cerr << "save_pm: can't write to file \"" << filename << "\"" << endl;
226  return -1;
227  }
228 
229  *outf << "XAO1\n\n"; // xmg header identifier.
230  *outf << "LineType bars \n";
231  *outf << "LineStyle solid \n";
232  *outf << "LineWidth 0 \n";
233  *outf << "Freq 16\n";
234  *outf << "Format Binary \n";
235  *outf << char(12) << "\n"; // control L character
236 
237  position = 0.0;
238  int gap = 0;
239  for (int i = 0; i < fz.num_frames(); ++i)
240  {
241  if (fz.val(i))
242  {
243  if (gap)
244  {
245  position = fz.t(i);
246  gap = 0;
247  }
248  period = 1.0 / fz.a(i);
249  *outf << (position + period) * 1000.0 << endl;
250  position += period;
251  }
252  else
253  gap = 1;
254  }
255 
256  if (outf != &cout)
257  delete outf;
258 
259  return 0;
260 }
261 
262 /**@name Examples
263 
264 Pitch detection on typical male voice, using low pass filtering:
265 <screen>
266 $ pda kdt_010.wav -o kdt_010.f0 -fmin 80 -fmax 200 -L
267 </screen>
268 */
269 //@{
270 
271 //@}
272 //@}