Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
tilt_analysis_main.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Authors: Paul Taylor */
34 /* Date : Oct 95 */
35 /*-----------------------------------------------------------------------*/
36 /* Event RFC and Tilt labelling */
37 /* */
38 /*=======================================================================*/
39 
40 #include <cstdlib>
41 #include "EST_tilt.h"
42 #include "sigpr/EST_sigpr_utt.h"
43 #include "EST_cmd_line_options.h"
44 #include "ling_class/EST_relation_aux.h"
45 #include "EST_string_aux.h"
46 
47 #define SIL_NAMES "sil !ENTER !EXIT"
48 #define EVENT_NAMES "a rb arb m mrb"
49 
50 void set_fn_start(EST_Relation &ev);
51 void default_rfc_params(EST_Features &op);
52 void override_rfc_params(EST_Features &rfc, EST_Option &al);
53 void rfc_analysis(EST_Track &fz, EST_Relation &ev, EST_Features &op);
54 void change_label(EST_Relation &seg, const EST_StrList &oname,
55  const EST_String &nname);
56 
57 void set_options(EST_Option &al, EST_Features &op);
58 
59 void option_override(EST_Features &op, EST_Option al,
60  const EST_String &option, const EST_String &arg);
61 
62 
63 
64 /** @name <command>tilt_analysis</command> <emphasis>Produce tilt descriptions from F0 contours</emphasis>
65  * @id tilt_analysis-manual
66  * @toc
67  */
68 
69 //@{
70 
71 void extract_channels(EST_Wave &single, const EST_Wave &multi, EST_IList &ch_list);
72 
73 /**@name Synopsis
74  */
75 //@{
76 
77 //@synopsis
78 
79 /**
80 tilt_analysis produces a Tilt or RFC analysis of a F0 contour, given a set
81 label file containing a set of approximate intonational event boundaries.
82 
83 A detailed description of the Tilt intonation model can be found in the
84 <link linkend="tilt-overview">Tilt model overview</link> section.
85 
86 */
87 
88 //@}
89 
90 /**@name OPTIONS
91  */
92 //@{
93 
94 //@options
95 
96 //@}
97 
98 
99 
100 int main(int argc, char *argv[])
101 {
102  EST_Track fz, nfz;
103  EST_Relation ev;
104  EST_Option al;
105  EST_Features op;
106  EST_StrList files, event_list, sil_list;
107  EST_String out_file, pstring;
108  EST_Track speech, raw_fz;
109  EST_Relation sil_lab;
110  EST_Features rfc_op;
111 
112  parse_command_line
113  (argc, argv,
114  EST_String("[input f0 file] -e [input event label file] -o [output file]"
115  "[options]")+
116  "Summary: produce rfc file from events and f0 contour\n"
117  "use \"-\" to make input and output files stdin/out\n"
118  "-h Options help\n\n"+
119  options_track_input()+ "\n"
120  "-event_names <string> List of labels to be classed as events. \n"
121  " Lists are specified as quoted strings with spaces \n"
122  " separating each item, e.g.: \"a b c d\"\n\n"
123  "-sil_names <string> List of labels to be classed as silence \n"
124  " Lists are specified as quoted strings with spaces \n"
125  " separating each item, e.g.: \"pau sil #\"\n\n"
126  "-e <ifile> Input event label file. This file contains \n"
127  " the list of events to be parameterized, each with its approximate \n"
128  " start and stop time marked. This file also contains silencesn \n"
129  " which are used to decide where to insert and stop phrases \n\n"
130  "-o <ofile> Output label file\n\n"
131  "-otype <string> File type of output file \n\n"
132  "-limit <float> start and stop limit in seconds. The rfc \n"
133  " matching algorithm defines a search region within which it tries \n"
134  " all possible rise and fall shapes. This option specifies how much \n"
135  " before the input label start time and how much after the input \n"
136  " label end time the search region should be. Typical value, 0.1 \n\n"
137  "-range <float> Range of RFC search region. In addition to \n"
138  " the limit, the range defines the limits of the rfc matching \n"
139  " search region as a percentage of the overal input label \n"
140  " duration. Typical value, 0.25 (the search region is the first and \n"
141  " last 25% of the label) \n\n"
142  "-smooth Smooth and Interpolate input F0 contour. \n"
143  " rfc matching can only operate on smooth fully interpolated \n"
144  " contours. This option must be used if the contour hasn't already \n"
145  " been smoothed and interpolated\n\n"
146  "-w1 <float> length in seconds of smoothing window prior\n"
147  " to interpolation. Default value 0.05 \n\n"
148  "-w2 <float> length in seconds of smoothing window after\n"
149  " to interpolation. Default value 0.05 \n\n"
150  "-sf0 <ofile> Save f0 contour that results from smoothing \n"
151  "-rfc Save as RFC parameters instead of tilt\n\n",
152  files, al);
153 
154  default_rfc_params(rfc_op);
155  override_rfc_params(rfc_op, al);
156  set_options(al, op);
157 
158  out_file = al.present("-o") ? al.val("-o") : (EST_String)"-";
159 
160  if (read_track(nfz, files.first(), al) == -1)
161  exit(-1);
162  // REORG - extract proper f0 channel here
163  nfz.copy_sub_track(fz, 0, EST_ALL, 0, 1);
164 
165  if (ev.load(al.val("-e")) != format_ok)
166  exit(-1);
167 
168  pstring = (al.present("-event_names") ? al.val("-event_names"):
169  EST_String("a b ab pos"));
170  StringtoStrList(pstring, event_list);
171  convert_to_broad(ev, event_list, "int_event", 1);
172 
173  // ensure all sil_names are re-written as sil
174  pstring = (al.present("-sil_names") ? al.val("-sil_names"):
175  EST_String(SIL_NAMES));
176  StringtoStrList(pstring, sil_list);
177  change_label(ev, sil_list, "sil");
178 
179  if (al.present("-smooth"))
180  {
181  sil_lab = ev;
182  StringtoStrList("sil", sil_list);
183  convert_to_broad(sil_lab, sil_list, "pos", 0);
184  label_to_track(sil_lab, speech, fz.shift());
185  raw_fz = fz;
186  smooth_phrase(raw_fz, speech, op, fz);
187  }
188 
189  if (al.present("-sf0"))
190  fz.save(al.val("-sf0"));
191 
192  ev.f.set("name", "intevents");
193  ev.f.set("timing_style", "segment");
194 
195 // set_fn_start(ev);
196 
197  // main RFC analysis function
198  rfc_analysis(fz, ev, rfc_op);
199 
200  // convert to Tilt if necessary
201  if (!al.present("-rfc"))
202  {
203  rfc_to_tilt(ev);
204  ev.remove_item_feature("rfc");
205  }
206 
207  ev.save(out_file);
208 }
209 
210 /** @name Input Intonation Files
211 
212 A label file containing approximate intonational event boundaries must
213 be given as input. A typical file in xlabel format is shown below:
214 </para>
215 <para>
216 <screen>
217  0.290 146 sil
218  0.480 146 c
219  0.620 146 a
220  0.760 146 c
221  0.960 146 a
222  1.480 146 c
223  1.680 146 a
224  1.790 146 sil
225 </screen>
226 </para>
227 <para>
228 The set of intonational events can be given on the command line with
229 the -event_names option. The default set is "a rb arb m mrb" and so
230 the above example would not need the -event_names option. The label
231 "c" (connection) is to separate events, in effect giving each event a
232 start time as well as a end time. The silence labels are important
233 also: they specify where phrases should start and end.
234 */
235 
236 //@{
237 //@}
238 
239 /** @name Input F0 Files
240 
241 tilt_analysis can operate on all the F0 file types supported by the
242 EST library. Tilt analysis can only operate on smooth and continuous
243 F0 contours.(i.e. F0 values must be defined during unvoiced
244 regons). If the input contour is not in this format, use the -smooth
245 option. The -w1 and -w2 options can be used to control the amount of
246 smoothing. The smoothed version of the input contour can be examined
247 by saving it using the -sf0 option.
248 
249 */
250 
251 //@{
252 //@}
253 
254 /** @name Output Intonation Files
255 
256 The output will be a label file containing the tilt parameters for the
257 events in feature format. An example, in xlabel format, is shown below:
258 </para>
259 <para>
260 <screen>
261 intonation_style tilt
262 #
263 0.29 26 phrase_start ; ev.f0 115.234 ; time 0.29 ;
264 0.53 26 a ; int_event 1 ; ev.f0 118.171 ; time 0.53 ; tilt.amp 21.8602 ;
265  tilt.dur 0.26 ; tilt.tilt -0.163727 ;
266 0.77 26 a ; int_event 1 ; ev.f0 112.694 ; time 0.77 ; tilt.amp 27.0315 ;
267  tilt.dur 0.32 ; tilt.tilt -0.446791 ;
268 1.53 26 a ; int_event 1 ; ev.f0 100.83 ; time 1.53 ; tilt.amp 7.507 ;
269  tilt.dur 0.22 ; tilt.tilt -0.296317 ;
270 1.79 26 phrase_end ; ev.f0 92.9785 ; time 1.79 ;
271 </screen>
272 </para>
273 <para>
274 The -rfc option will make a file containing the RFC parameters instead:
275 </para>
276 <para>
277 <screen>
278 intonation_style rfc
279 #
280 0.29 26 phrase_start ; ev.f0 115.234 ; time 0.29 ;
281 0.53 26 a ; ev.f0 118.171 ; rfc.rise_amp 8.19178 ; rfc.rise_dur 0.12 ;
282  rfc.fall_amp -13.6684 ; rfc.fall_dur 0.14 ; time 0.53 ;
283  0.77 26 a ; ev.f0 112.694 ; rfc.rise_amp 6.50673 ; rfc.rise_dur 0.1 ;
284  rfc.fall_amp -20.5248 ; rfc.fall_dur 0.22 ; time 0.77 ;
285 1.53 26 a ; ev.f0 100.83 ; rfc.rise_amp 1.55832 ; rfc.rise_dur 0.11 ;
286  rfc.fall_amp -6.09238 ; rfc.fall_dur 0.11 ; time 1.53 ;
287 1.79 26 phrase_end ; ev.f0 92.9785 ; time 1.79 ;
288 </screen>
289 </para>
290 <para>
291 The feature in the header, "intonation_style tilt" or
292 "intonation_style rfc" is needed for the tilt_synthesis program to
293 work.
294 
295 */
296 
297 //@{
298 //@}
299 
300 //@}
301 
302 
303 void override_rfc_params(EST_Features &rfc, EST_Option &al)
304 {
305  if (al.present("-limit"))
306  {
307  rfc.set("start_limit", al.fval("-limit"));
308  rfc.set("stop_limit", al.fval("-limit", 0));
309  }
310  if (al.present("-range"))
311  rfc.set("range", al.fval("-range"));
312  if (al.present("-min_dur"))
313  rfc.set("min_event_duration", al.fval("-min_dur"));
314 }
315 
316 void set_options(EST_Option &al, EST_Features &op)
317 {
318  // Nobody else has set window_length or second_length so
319  // set defaults here
320  op.set("window_length",0.05);
321  op.set("second_length",0.05);
322  option_override(op, al, "window_length", "-w1");
323  option_override(op, al, "second_length", "-w2");
324 }