Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
ch_wave_main.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor */
34 /* Date : April 1995 */
35 /*-----------------------------------------------------------------------*/
36 /* Change EST_Wave utility main */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <iostream>
41 #include <cmath>
42 #include "EST_Wave.h"
43 #include "EST_cmd_line.h"
44 #include "EST_cmd_line_options.h"
45 #include "EST_sigpr.h"
46 #include "EST_wave_aux.h"
47 #include "EST.h"
48 
49 void wave_extract_channel(EST_Wave &single, const EST_Wave &multi, EST_IList &ch_list);
50 
51 
52 void extract_channels(EST_Wave &single, const EST_Wave &multi, EST_IList &ch_list);
53 
54 /** @name <command>ch_wave</command> <emphasis>Audio file manipulation</emphasis>
55  @id ch_wave_manual
56  * @toc
57  */
58 
59 //@{
60 
61 
62 /**@name Synopsis
63  */
64 //@{
65 
66 //@synopsis
67 
68 /**
69 ch_wave is used to manipulate the format of a waveform
70 file. Operations include:
71 
72 <itemizedlist>
73 <listitem><para>file format conversion</para></listitem>
74 <listitem><para>resampling (changing the sampling frequency)</para></listitem>
75 <listitem><para>byte-swapping</para></listitem>
76 <listitem><para>making multiple input files into a single multi-channel output file</para></listitem>
77 <listitem><para>making multiple input files into a single single-channel output file</para></listitem>
78 <listitem><para>extracting a single channel from a multi-channel waveform</para></listitem>
79 <listitem><para>scaling the amplitude of the waveform</para></listitem>
80 <listitem><para>low pass and high pass filtering</para></listitem>
81 <listitem><para>extracting a time-delimited portion of the waveform</para></listitem>
82 </itemizedlist>
83 
84 ch_wave is a executable program that serves as a wrap-around for the
85 EST_Wave class and the basic wave manipulation functions. More
86 advanced waveform processing is performed by the signal processing library.
87 
88 */
89 
90 //@}
91 
92 /**@name OPTIONS
93  */
94 //@{
95 
96 //@options
97 
98 //@}
99 
100 
101 int main (int argc, char *argv[])
102 {
103  EST_Wave sig, sigload;
104  EST_String in_file("-"), out_file("-"), op_file(""), test;
105  EST_Option al;
106  EST_StrList files;
107  EST_Litem *p;
108 
109 
110  parse_command_line
111  (argc, argv,
112  EST_String("[input file0] [input file1] ... -o [output file]\n")+
113  "Summary: change/copy/combine waveform files\n"+
114  "use \"-\" to make input and output files stdin/out\n"+
115  "-h Options help\n\n"+
116  options_wave_input()+
117  options_wave_output()+
118  "-scale <float> Scaling factor. Increase or descrease the amplitude\n"
119  " of the whole waveform by the factor given\n\n"
120 
121  "-scaleN <float> Scaling factor with normalization. \n"
122  " The waveform is scaled to its maximum level, after which \n"
123  " it is scaled by the factor given\n\n"
124 
125  "-lpfilter <int> Low pass filter, with cutoff frequency in Hz \n"
126  " Filtering is performed by a FIR filter which is built at run \n"
127  " time. The order of the filter can be given by -forder. The \n"
128  " default value is 199\n\n"
129 
130  "-hpfilter <int> High pass filter, with cutoff frequency in Hz \n"
131  " Filtering is performed by a FIR filter which is \n"
132  " built at run time. The order of the filter can \n"
133  " be given by -forder. The default value is 199.\n\n"
134 
135  "-forder <int> Order of FIR filter used for lpfilter and \n"
136  " hpfilter. This must be ODD. Sensible values range \n"+
137  " from 19 (quick but with a shallow rolloff) to 199 \n"
138  " (slow but with a steep rolloff). The default is 199.\n\n"
139 
140  "-fafter Do filtering after other operations such as \n"
141  " resampling (default : filter before other operations)\n\n"
142 
143  "-info Print information about file and header. \n"
144  " This option gives useful information such as file \n"
145  " length, sampling rate, number of channels etc\n"
146  " No output is produced\n\n"
147 
148  "-add A new single channel waveform is created by adding \n"
149  " the corresponding sample points of each input waveform\n\n"
150 
151  "-pc <string> Combine input waveforms to form a single \n"
152  " multichannel waveform. The argument to this option controls \n"
153  " how long the new waveform should be. If the option \n"
154  " is LONGEST, the output wave if the length of the \n"
155  " longest input wave and shorter waves are padded with \n"
156  " zeros at the end. If the option is FIRST, the length \n"
157  " of the new waveform is the length of the first file \n"
158  " on the command line, and subsequent waves are padded \n"
159  " or cut to this length\n\n"
160 
161  "-key <ifile> Label file designating subsections, for use with \n"
162  " -divide. The KEYLAB file is a label file which specifies \n"
163  " where chunks (such as individual sentences) in \n"
164  " a waveform begin and end. See section of wave extraction.\n\n"
165 
166  "-divide Divide a single input waveform into multiple output \n"
167  " waveforms. Each output waveform is extracted from the \n"
168  " input waveform by using the KEYLAB file, which \n"
169  " specifies the start and stop times for each chunk. \n"
170  " The output files are named according to the filename \n"
171  " in the KEYLAB file, with extension given by -ext. See \n"
172  " section on wave extraction\n\n"
173 
174  "-ext <string> File extension for divided waveforms\n\n"
175 
176  "-extract <string> Used in conjunction with -key to extract a \n"
177  " single section of waveform from the input \n"
178  " waveform. The argument is the name of a file given \n"
179  " in the file column of the KEYLAB file.\n",
180  files, al);
181 
182  out_file = al.present("-o") ? al.val("-o") : (EST_String)"-";
183 
184  // There will always be at least one (or stdin)
185  // The first is dealt specially in case its *way* big
186  if (read_wave(sig, files.first(), al) != format_ok)
187  exit(-1);
188  if (al.present("-info"))
189  wave_info(sig);
190  // concat or parallelize remaining input files
191 
192  if (files.length() > 1)
193  {
194  for (p= files.head()->next(); p != 0; p=p->next())
195  {
196  if (read_wave(sigload, files(p), al) != format_ok)
197  exit(-1);
198  if (al.present("-info"))
199  wave_info(sigload);
200  else if (al.present("-pc"))
201  {
202  if ((al.val("-pc") == "longest") &&
203  (sig.num_samples() < sigload.num_samples()))
204  sig.resize(sigload.num_samples());
205  else /* "first" or sig is longer */
206  sigload.resize(sig.num_samples());
207  sig |= sigload;
208  }
209  else if (al.present("-add"))
210  add_waves(sig, sigload);
211  else
212  sig += sigload;
213  }
214  }
215 
216  if (al.present("-info"))
217  exit(0); // done what I've been asked to so stop
218 
219  // All input files are now in a single wave called sig
220 
221  // default is to filter before any resampling etc.
222  // (this may cause problems for multiplexed data !)
223  if(!al.present("-fafter")){
224  if(al.present("-lpfilter"))
225  FIRlowpass_filter(sig,al.ival("-lpfilter"),al.ival("-forder"));
226  if(al.present("-hpfilter"))
227  FIRhighpass_filter(sig,al.ival("-hpfilter"),al.ival("-forder"));
228  }
229 
230  if (al.present("-c")) // extract a channel from a multi-channel wave
231  {
232  EST_StrList s;
233  EST_IList il;
234  EST_Wave nsig;
235  StringtoStrList(al.val("-c"), s, " ,"); // separator can be space or comma
236  StrListtoIList(s, il);
237  extract_channels(nsig, sig, il);
238  sig = nsig;
239  }
240 
241  if (al.present("-F")) // resample
242  sig.resample(al.ival("-F"));
243 
244  if (al.present("-scale")) // rescale
245  {
246  float scale = al.fval("-scale", 0);
247  sig.rescale(scale);
248  }
249  else if (al.present("-scaleN")) // rescale
250  {
251  float scale = al.fval("-scaleN", 0);
252  if ((scale < 0) || (scale > 1.0))
253  {
254  cerr << "ch_wave: -scaleN must be in range 0 to 1" << endl;
255  exit(-1);
256  }
257  sig.rescale(scale,1);
258  }
259 
260  EST_Relation key;
261 
262  if (al.present("-divide"))
263  {
264  EST_WaveList wl;
265  if (!al.present("-key"))
266  {
267  cerr << "Must have key file specified when dividing waveform\n";
268  exit (-1);
269  }
270  if (key.load(al.val("-key")) != format_ok)
271  exit(-1);
272 
273  if (wave_divide(wl, sig, key, al.val("-ext", 0)) == -1)
274  exit(0);
275  for (p = wl.head(); p; p = p->next())
276  wl(p).save(wl(p).name(), al.val("-otype", 0));
277  exit(0);
278  }
279  else if (al.present("-extract"))
280  {
281  EST_Wave e;
282  if (!al.present("-key"))
283  {
284  cerr << "Must have key file specified when dividing waveform\n";
285  exit (-1);
286  }
287  if (key.load(al.val("-key")) != format_ok)
288  exit(-1);
289 
290  if (wave_extract(e, sig, key, al.val("-extract")) == -1)
291  exit (-1);
292  sig = e;
293  }
294 
295  // if we are filtering after other operations
296  if(al.present("-fafter")){
297  if(al.present("-lpfilter"))
298  FIRlowpass_filter(sig,al.ival("-lpfilter"),al.ival("-forder"));
299  if(al.present("-hpfilter"))
300  FIRhighpass_filter(sig,al.ival("-hpfilter"),al.ival("-forder"));
301  }
302 
303  write_wave(sig, out_file, al);
304  return 0;
305 }
306 
307 /** @name Making multiple waves into a single wave
308 
309 If multiple input files are specified, by default they are concatenated into
310 the output file.
311 </para>
312 <para>
313 <screen>
314 $ ch_wave kdt_010.wav kdt_011.wav kdt_012.wav kdt_013.wav -o out.wav
315 </screen>
316 </para>
317 <para>
318 In the above example, 4 single channel input files are converted to
319 one single channel output file. Multi-channel waveforms can also be
320 concatenated provided they all have the same number of input channels.
321 
322 </para><para>
323 
324 Multiple input files can be made into a multi-channel output file by
325 using the -pc option:
326 
327 </para><para>
328 <screen>
329 $ ch_wave kdt_010.wav kdt_011.wav kdt_012.wav kdt_013.wav -o -pc LONGEST out.wav
330 </screen>
331 </para>
332 <para>
333 The argument to -pc can either be LONGEST, in which the output
334 waveform is the length of the longest input file, or FIRST in which it
335 is the length of the first input file.
336 
337 */
338 
339 //@{
340 //@}
341 
342 /** @name Extracting channels from multi-channel waves
343 
344 The -c option is used to specify channels which should be extracted
345 from the input. If the input is a 4 channel wave,
346 </para><para>
347 <screen>
348 $ ch_wave kdt_m.wav -o a.wav -c "0 2"
349 </screen>
350 </para>
351 <para>
352 will extract the 0th and 2nd channel (counting starts from 0). The
353 argument to -c can be either a single number of a list of numbers
354 (wrapped in quotes)
355 
356  */
357 //@{
358 //@}
359 
360 
361 /** @name Extracting of a single region from a waveform
362 
363 There are several ways of extracting a region of a waveform. The
364 simplest way is by using the start, end, to and from commands to
365 delimit a sub portion of the input wave. For example
366 </para><para>
367 <screen>
368 $ ch_wave kdt_010.wav -o small.wav -start 1.45 -end 1.768
369 </screen>
370 </para>
371 <para>
372 extracts a subwave starting at 1.45 seconds and extending to 1.768 seconds.
373 
374 alternatively,
375 </para><para>
376 <screen>
377 $ ch_wave kd_010.wav -o small.wav -from 5000 -to 10000
378 </screen>
379 </para>
380 <para>
381 extracts a subwave starting at 5000 samples and extending to 10000
382 samples. Times and samples can be mixed in sub-wave extraction. The
383 output waveform will have the same number of channels as the input
384 waveform.
385 
386 */
387 //@{
388 //@}
389 
390 /** @name Extracting of a multiple regions from a waveform
391 
392 Multiple regions can be extracted from a waveform, but as it would be
393 too complicated to specify the start and end points on the command
394 line, a label file with start and end points, and file names is used.
395 
396 The file is called a key label file and in xwaves label format looks
397 like:
398 </para>
399 <para>
400 <screen>
401 separator ;
402 #
403 0.308272 121 sil ; file kdt_010.01 ;
404 0.440021 121 are ; file kdt_010.02 ;
405 0.512930 121 your ; file kdt_010.03 ;
406 0.784097 121 grades ; file kdt_010.04 ;
407 1.140969 121 higher ; file kdt_010.05 ;
408 1.258647 121 or ; file kdt_010.06 ;
409 1.577145 121 lower ; file kdt_010.07 ;
410 1.725516 121 than ; file kdt_010.08 ;
411 2.315186 121 nancy's ; file kdt_010.09 ;
412 </screen>
413 </para>
414 <para>
415 Each line represents one region. The first column is the end time of
416 that region and the start time of the next. The next two columns are
417 colour and an arbitrary name, and the filename in which the output
418 waveform is to be stored is kept as a field called file in the last column.
419 In this example, each region corresponds to a single word in the file.
420 
421 If the above file is called "kdt_010.words.keylab", the command:
422 </para>
423 <para>
424 <screen>
425 $ ch_wave kdt_010.wav -key kdt_010.words -ext .wav -divide
426 </screen>
427 </para>
428 <para>
429 will divide the input waveform into 9 output waveforms called
430 kdt_010.01.wav, kdt_010.02.wav ... kdt_010.09.wav. The -ext option
431 specifies the extension of the new waveforms, and the -divide command
432 specifies that division of the entire waveform is to take place.
433 
434 If only a single file is required the -extract option can be used, in
435 which case its argument is the filename required.
436 </para>
437 <para>
438 <screen>
439 $ ch_wave kdt_010.wav -key kdt_010.words -ext .wav -extract kdt_010.03 \
440  -o kdt_010.03.wav
441 </screen>
442 </para>
443 <para>
444 Note that an output filename should be specified with this option.
445 */
446 //@{
447 //@}
448 
449 /** @name Adding headers and format conversion
450 
451 It is usually a good idea for all waveform files to have headers as
452 this way different byte orders, sampling rates etc can be handled
453 safely. ch_wave provides a means of adding headers to raw files.
454 
455 The following adds a header to a file of 16 bit shorts
456 </para>
457 <para>
458 <screen>
459 $ ch_wave kdt_010.raw1 -o kdt_010.h1.wav -otype nist -f 16000 -itype raw
460 </screen>
461 </para>
462 <para>
463 The following downsamples the input to 8 KHz
464 </para>
465 <para>
466 <screen>
467 $ ch_wave kdt_010.raw1 -o kdt_010.h2.wav -otype nist -f 16000 \
468  -F 8000 -itype raw
469 </screen>
470 </para>
471 <para>
472 The following takes a 8K ulaw input file and produces a 16bit, 20Khz output file:
473 </para>
474 <para>
475 <screen>
476 $ ch_wave kdt_010.raw2 -o kdt_010.h3.wav -otype nist -istype ulaw \
477  -f 8000 -F 20000 -itype raw
478 </screen>
479 */
480  //@{
481  //@}
482 
483 //@}