Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
ch_track_main.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1994,1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor */
34 /* Date : June 1994 */
35 /*-----------------------------------------------------------------------*/
36 /* EST_Track file manipulation program */
37 /* */
38 /*=======================================================================*/
39 
40 #include "EST.h"
41 #include "EST_cmd_line_options.h"
42 
43 #define DEFAULT_TIME_SCALE 0.001
44 
45 int StrListtoIList(EST_StrList &s, EST_IList &il);
46 void extract_channel(EST_Track &orig, EST_Track &nt, EST_IList &ch_list);
47 
48 EST_write_status save_snns_pat(const EST_String filename,
49  EST_TrackList &inpat, EST_TrackList &outpat);
50 
51 EST_read_status read_TrackList(EST_TrackList &tlist, EST_StrList &files,
52  EST_Option &al);
53 
54 void extract(EST_Track &tr, EST_Option &al);
55 /** @name <command>ch_track</command> <emphasis>Track file manipulation</emphasis>
56  * @id ch-track-manual
57  * @toc
58  */
59 
60 //@{
61 
62 
63 /**@name Synopsis
64  */
65 //@{
66 
67 //@synopsis
68 
69 /**
70 ch_track is used to manipulate the format of a track
71 file. Operations include:
72 
73 <itemizedlist>
74 <listitem><para>file format conversion</para></listitem>
75 <listitem><para>smoothing</para></listitem>
76 <listitem><para>changing the frame spacing of a track (resampling)</para></listitem>
77 <listitem><para>producing differentiated and delta tracks</para></listitem>
78 <listitem><para>Using a threshold to convert a track file to a label file</para></listitem>
79 
80 <listitem><para>making multiple input files into a single multi-channel output file</para></listitem>
81 <listitem><para>extracting a single channel from a multi-channel track</para></listitem>
82 <listitem><para>extracting a time-delimited portion of the waveform</para></listitem>
83 </itemizedlist>
84 
85  */
86 
87 //@}
88 
89 /**@name Options
90  */
91 //@{
92 
93 //@options
94 
95 //@}
96 
97 
98 int main(int argc, char *argv[])
99 {
100  EST_String in_file("-"), out_file("-");
101  EST_Option al, settings;
102  EST_String fname, ftmp;
103  EST_StrList files;
104  EST_Track tr;
105  EST_TrackList trlist;
106  EST_Litem *p;
107 
108  parse_command_line(
109  argc, argv,
110  EST_String("[input file] -o [output file] [options]\n")+
111  "Summary: change/copy track files\n"
112  "use \"-\" to make input and output files stdin/out\n"
113  "-h Options help\n"+
114  options_track_input()+ "\n"+
115  options_track_output()+ "\n"
116  "-info Print information about file and header. \n"
117  " This option gives useful information such as file \n"
118  " length, file type, channel names. No output is produced\n\n"
119  "-track_names <string> \n"
120  " File containing new names for output channels\n\n"
121  "-diff Differentiate contour. This performs simple \n"
122  " numerical differentiation on the contour by \n"
123  " subtracting the amplitude of the current frame \n"
124  " from the amplitude of the next. Although quick, \n"
125  " this technique is crude and not recommende as the \n"
126  " estimation of the derivate is done on only one point\n\n"
127  "-delta <int> Make delta coefficients (better form of differentiate).\n"
128  " The argument to this option is the regression length of \n"
129  " of the delta calculation and can be between 2 and 4 \n\n"
130  "-sm <float> Length of smoothing window in seconds. Various types of \n"
131  " smoothing are available for tracks. This options specifies \n"
132  " length of the smooting window which effects the degree of \n"
133  " smoothing, i.e. a longer value means more smoothing \n\n"
134  "-smtype <string> Smooth type, median or mean\n"
135  "-style <string> Convert track to other form. Currently only one form \n"
136  " \"label\" is supported. This uses a specified cut off to \n"
137  " make a label file, with two labels, one for above the \n"
138  " cut off (-pos) and one for below (-neg)\n\n"
139  "-t <float> threshold for track to label conversion \n"
140  "-neg <string> Name of negative label in track to label conversion \n"
141  "-pos <string> Name of positive label in track to label conversion \n"
142  "-pc <string> Combine given tracks in parallel. If option \n"
143  " is longest, pad shorter tracks to longest, else if \n"
144  " first pad/cut to match first input track \n" +
145  options_track_filetypes_long(),
146  files, al);
147 
148 /*redundant options
149  "-time_channel <string>\n"+
150  " Which track in track file holds pitchmark times\n"+
151  "-time_scale <float> \n"+
152  " Scale of pitchmarks (default 0.001 = milliseconds)\n"+
153 */
154 
155 
156  override_lib_ops(settings, al);
157  out_file = al.present("-o") ? al.val("-o") : (EST_String)"-";
158 
159  EST_TokenStream ts;
160 
161 // ts.open(files.first());
162 // tr.load(ts);
163 // cout << tr;
164 
165  if (read_TrackList(trlist, files, al) != read_ok)
166  exit(0);
167 
168  if (files.length() == 0)
169  {
170  cerr << argv[0] << ": no input files specified\n";
171  exit(-1);
172  }
173 
174  if (al.present("-info"))
175  {
176  for (p = trlist.head(); p; p = p->next())
177  track_info(trlist(p));
178  exit(0);
179  }
180 
181  if (al.present("-pc")) // parallelize them
182  ParallelTracks(tr, trlist, al.val("-pc"));
183 
184  else if (al.val("-otype", 0) == "snns")
185  { // sometime this will generalise for multiple input files
186  EST_TrackList inpat, outpat;
187  inpat.append(trlist.nth(0));
188  outpat.append(trlist.nth(1));
189  save_snns_pat(out_file, inpat, outpat);
190  exit(0);
191  }
192  else // concatenate them
193  {
194  tr.resize(0, tr.num_channels());
195  // Reorg -- fix += to resize to largest num_channels (with warning)
196  for (p = trlist.head(); p; p = p->next())
197  tr += trlist(p);
198  }
199 
200  if (al.present("-S"))
201  tr.sample(al.fval("-S"));
202  if (al.present("-sm"))
203  {
204  track_smooth(tr, al.fval("-sm"),al.val("-smtype"));
205  }
206 
207  if (al.present("-diff") && al.present("-delta"))
208  {
209  cerr << "Using -diff and -delta together makes no sense !\n";
210  exit(-1);
211  }
212  if (al.present("-diff"))
213  {
214  tr = differentiate(tr);
215  }
216  if (al.present("-delta"))
217  {
218  EST_Track ntr = tr; // to copy size !;
219  delta(tr,ntr,al.ival("-delta"));
220  tr = ntr;
221  }
222 
223  if (al.present("-c"))
224  {
225  EST_StrList s;
226  EST_Track ntr;
227  EST_IList il;
228  StringtoStrList(al.val("-c"), s, " ,"); // separator can be space or comma
229  StrListtoIList(s, il);
230  extract_channel(tr, ntr, il);
231  tr = ntr;
232  }
233 
234  if (al.present("-start") || al.present("-end")
235  || al.present("-to") || al.present("-from"))
236  extract(tr, al);
237 
238 // tr.assign_map(&LPCTrackMap);
239 // tr.set_space_type("VARI");
240 
241 
242  // optionally rename output tracks before saving
243 
244  if (al.present("-track_names"))
245  {
246  EST_StrList new_names;
247  if(load_StrList(al.val("-track_names"),new_names) != format_ok)
248  {
249  cerr << "Failed to load new track names file." << endl;
250  exit(-1);
251  }
252  /*
253  if (tr.num_channels() != new_names.length())
254  {
255  cerr << "Number of names in output track names file (";
256  cerr << new_names.length() << ") " << endl;
257  cerr << " does not match number of output channels (";
258  cerr << tr.num_channels() << ")" << endl;
259  exit(-1);
260  }
261 
262  EST_Litem *np;
263  int ni;
264  for (np = new_names.head(),ni=0; np; np = np->next(),ni++)
265  tr.set_channel_name(new_names(np),ni);
266  */
267  tr.resize(EST_CURRENT, new_names);
268  }
269 
270  // track_info(tr);
271 
272 /* tr.resize(EST_CURRENT, 10);
273 
274  cout << "new\n";
275  track_info(tr);
276 
277  EST_StrList x;
278  x.append("a");
279  x.append("c");
280  x.append("d");
281 
282 
283 
284  cout << "new\n";
285  track_info(tr);
286 */
287 
288 
289  // Write out file in appropriate format
290 
291  if (al.val("-style",0) == "label")
292  {
293  EST_Relation lab;
294  if (al.present("-t"))
295  track_to_label(tr, lab, al.fval("-t"));
296  else
297  track_to_label(tr, lab);
298  if (al.present("-pos"))
299  change_label(lab, "pos", al.val("-pos"));
300  if (al.present("-neg"))
301  change_label(lab, "neg", al.val("-neg"));
302  if (lab.save(out_file) != write_ok)
303  exit(-1);
304  }
305 /* else if (al.val("-style",0) == "pm")
306  {
307  EST_Relation lab;
308 
309  if (!al.present("-f"))
310  {
311  cerr << "must specify sample rate (with -f) for pm style\n";
312  exit(-1);
313  }
314  int sample_rate = al.ival("-f", 0);
315 
316  track_to_pm(tr, sample_rate, lab);
317 
318  if (lab.save(out_file) != write_ok)
319  exit(-1);
320  }
321 */
322  else
323  {
324  if (tr.save(out_file, al.val("-otype")) != write_ok)
325  exit(-1);
326  }
327 
328  return 0;
329 }
330 
331 void override_lib_ops(EST_Option &a_list, EST_Option &al)
332 {
333  a_list.override_val("ishift", al.val("-s", 0));
334  a_list.override_val("color", al.val("-color", 0));
335  a_list.override_val("in_track_file_type", al.val("-itype", 0));
336  a_list.override_val("out_track_file_type", al.val("-otype", 0));
337  a_list.override_val("tr_to_label_thresh", al.val("-t", 0));
338  a_list.override_fval("time_scale", DEFAULT_TIME_SCALE);
339 
340  if (al.val("-style", 0) == "label")
341  a_list.override_val("lab_file_type", al.val("-otype", 0));
342  if (al.present("-time_scale"))
343  a_list.override_fval("time_scale", al.fval("-time_scale", 1));
344  if (al.present("-time_channel"))
345  a_list.override_val("time_channel", al.sval("-time_channel", 1));
346 }
347 
348 
349 /** @name Making multiple tracks into a single track
350 
351 If multiple input files are specified, by default they are concatenated into
352 the output file.
353 <para>
354 <screen>
355 $ ch_track kdt_010.tr kdt_011.tr kdt_012.tr kdt_013.tr -o out.tr
356 </screen>
357 </para>
358 <para>
359 In the above example, 4 multi channel input files are converted to
360 one single channel output file. Multi-channel tracks can
361 concatenated provided they all have the same number of input channels.
362 
363 </para><para>
364 
365 Multiple input files can be made into a multi-channel output file by
366 using the -pc option:
367 
368 </para><para>
369 <screen>
370 $ ch_track kdt_010.tr kdt_011.tr kdt_012.tr kdt_013.tr -o -pc longest out.tr
371 </screen>
372 </para>
373 <para>
374 The argument to -pc can either be longest, in which the output
375 track is the length of the longest input file, or first in which it
376 is the length of the first input file.
377 
378 */
379 
380 //@{
381 //@}
382 
383 /** @name Extracting channels from multi-channel tracks
384 
385 The -c option is used to specify channels which should be extracted
386 from the input. If the input is a 4 channel track,
387 </para><para>
388 <screen>
389 $ ch_track kdt_m.tr -o a.tr -c "0 2"
390 </screen>
391 </para>
392 <para>
393 will extract the 0th and 2nd channel (counting starts from 0). The
394 argument to -c can be either a single number of a list of numbers
395 (wrapped in quotes).
396 
397  */
398 //@{
399 //@}
400 
401 
402 /** @name Extracting of a single region from a track
403 
404 There are several ways of extracting a region of a track. The
405 simplest way is by using the start, end, to and from commands to
406 delimit a sub portion of the input track. For example
407 </para><para>
408 <screen>
409 $ ch_track kdt_010.tr -o small.tr -start 1.45 -end 1.768
410 </screen>
411 </para>
412 <para>
413 extracts a subtrack starting at 1.45 seconds and extending to 1.768 seconds.
414 alternatively,
415 </para><para>
416 <screen>
417 $ ch_track kdt_010.tr -o small.tr -from 50 -to 100
418 </screen>
419 </para>
420 <para>
421 extracts a subtrack starting at 50 frames and extending to 100
422 frames. Times and frames can be mixed in sub-track extraction. The
423 output track will have the same number of channels as the input track.
424 
425 
426 */
427 //@{
428 //@}
429 
430 /** @name Adding headers and format conversion
431 
432 It is usually a good idea for all track files to have headers as this
433 way different files can be handled safely. ch_track provides a means
434 of adding headers to unheadered files. These files are assumed to
435 be ascii floats with one channel per line.
436 
437 The following adds a header to an ascii file.
438 </para>
439 <para>
440 <screen>
441 $ ch_track kdt_010.atr -o kdt_010.h5.tr -otype est -s 0.01
442 </screen>
443 </para>
444 <para>
445 ch_track can change the frame shift of a fixed frame file, or convert
446 a variable frame shift file into a fixed frame shift. At present this
447 is done with a very crude resampling technique and hence the output
448 file may suffer from anti-aliasing distortion.</para><para>
449 
450 
451 Change to a frame spacing of 0.02 seconds:
452 </para><para>
453 <screen>
454 $ ch_track kdt_010.tr -o kdt_010.tr2 -S 0.02
455 </screen>
456 */
457  //@{
458  //@}
459 
460 //@}
461