Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
track_example.cc
1  /************************************************************************/
2  /* */
3  /* Centre for Speech Technology Research */
4  /* University of Edinburgh, UK */
5  /* Copyright (c) 1996,1997 */
6  /* All Rights Reserved. */
7  /* */
8  /* Permission is hereby granted, free of charge, to use and distribute */
9  /* this software and its documentation without restriction, including */
10  /* without limitation the rights to use, copy, modify, merge, publish, */
11  /* distribute, sublicense, and/or sell copies of this work, and to */
12  /* permit persons to whom this work is furnished to do so, subject to */
13  /* the following conditions: */
14  /* 1. The code must retain the above copyright notice, this list of */
15  /* conditions and the following disclaimer. */
16  /* 2. Any modifications must be clearly marked as such. */
17  /* 3. Original authors' names are not deleted. */
18  /* 4. The authors' names are not used to endorse or promote products */
19  /* derived from this software without specific prior written */
20  /* permission. */
21  /* */
22  /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23  /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24  /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25  /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26  /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27  /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28  /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29  /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30  /* THIS SOFTWARE. */
31  /* */
32  /*************************************************************************/
33  /* */
34  /* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
35  /* Date: Fri May 9 1997 */
36  /* ------------------------------------------------------------------- */
37  /* Example of declaration and use of tracks. */
38  /* */
39  /*************************************************************************/
40 
41 
42 #include <iostream>
43 #include <cstdlib>
44 #include "EST_Track.h"
45 #include "EST_Wave.h"
46 #include "EST_sigpr.h"
47 #include "EST_error.h"
48 
49 
50 /** @name EST_Track class example code
51  * @toc
52  * Some examples of track manipulations.
53  *
54  */
55 //@{
56 
57 int main(void)
58 
59 {
60  int i, j;
61 
62  /* This program is designed as an example not as something to run
63  so for testing purpose it simply exists */
64  exit(0);
65  /**@name Initialising and Resizing a Track
66 
67  The constructor functions can be used to create a track with
68  zero frames and channels or a track with a specified number of
69  frames and channels
70  */
71 
72  //@{
73  //@{ code
74  EST_Track tr; // default track declaration
75  EST_Track tra(500, 10); // allocate track with 500 frames and 10 channels
76  //@} code
77 
78  /** tracks can be resized at any time:
79  */
80  //@{ code
81  tr.resize(10, 500); // resize track to have 10 frames and 500 channels
82  tr.resize(500, 10); // resize track to have 500 frames and 10 channels
83  //@} code
84 
85  /** by default, resizing preserves values in the track. This
86  may involve copying some information, so if the existing values
87  are not needed, a flag can be set which usually results in
88  quicker resizing
89  */
90  //@{ code
91  tr.resize(250, 5, 0); // throw away any existing values
92  //@} code
93  /** If only the number of channels or the number of frames needs
94  to be changed, this an be done with the following functions:
95  */
96 
97  //@{ code
98  tr.set_num_channels(10); // makes 10 channels, keeps same no of frames
99 
100  tr.set_num_frames(400); // makes 400 frames, keeps same no of channels
101  //@} code
102  /** The preserve flag works in the same way with these functions
103  */
104  //@}
105 
106  /** @name Simple Access
107 
108  Values in the track can be accessed and set by frame
109  number and channel number.
110 
111  The following resizes a track to have 500 frames and 10 channels
112  and fills every position with -5.
113  */
114  //@{
115  //@{ code
116  tr.resize(500, 10);
117 
118  for (i = 0; i < tr.num_frames(); ++i)
119  for (j = 0; j < tr.num_channels(); ++j)
120  tr.a(i, j) = -5.0;
121 
122  //@} code
123 
124  /** A well formed track will have a time value, specified in seconds,
125  for every frame. The time array can be filled directly:
126  */
127  //@{ code
128  for (i = 0; i < tr.num_frames(); ++i)
129  tr.t(i) = (float) i * 0.01;
130  //@} code
131  /** which fills the time array with values 0.01, 0.02,
132  0.03... 5.0. However, A shortcut function is provided for fixed
133  frame spacing:
134  */
135  //@{ code
136  tr.fill_time(0.1);
137 
138  //@} code
139  /** which performs the same operation as above. Frames do not have
140  to be evenly spaced, in pitch synchronous processing the time
141  array holds the time position of each pitch period. In such
142  cases each position in the time array must obviously be set
143  individually.</para><para>
144 
145  Some representations have undefined values during certain
146  sections of the track, for example the F0 value during
147  unvoiced speech.</para><para>
148 
149  The break/value array can be used to specify if a frame has an
150  undefined value.<para></para>. If a frame in this array is 1,
151  that means the amplitude is defined at that point. If 0, the
152  amplitude is undefined. By default, every frame has a value.
153  </para><para>
154 
155  Breaks (undefined values) can be set by <method>set_break()
156  </method>. The following sets every frame from 50 to 99 as a
157  break:
158  */
159  //@{ code
160  for (i = 50; i < 100; ++i)
161  tr.set_break(i);
162  //@} code
163  /** frames can be turned back to values as follows:
164  */
165  //@{ code
166  for (i = 50; i < 100; ++i)
167  tr.set_value(i);
168  //@} code
169  /** It is up to individual functions to decide how to interpret breaks.
170  </para><para>
171  A frame's status can be checked as follows:
172  */
173  //@{ code
174  if (tr.val(60))
175  cout << "Frame 60 is not a break\n";
176 
177  if (tr.track_break(60))
178  cout << "Frame 60 is a break\n";
179  //@} code
180  //@}
181 
182  /** @name Naming Channels
183  @id tr-example-naming-channels
184 
185  While channels can be accessed by their index, it is often useful
186  to give them names and refer to them by those names.
187 
188  The set_channel_name() function sets the name of a single channel:
189  */
190  //@{
191  //@{ code
192  tr.set_channel_name("F0", 0);
193  tr.set_channel_name("energy", 1);
194  //@} code
195 
196  /** An alternative is to use a predefined set of channel names
197  stored in a <emphasis>map</emphasis>.A track map
198  is simply a String List strings which describe a channel name
199  configuration. The <method>resize</method> function can take
200  this and resize the number of channels to the number of channels
201  indicated in the map, and give each channel its name from the
202  map. For example:
203  */
204  //@{ code
205  EST_StrList map;
206  map.append("F0");
207  map.append("energy");
208 
209  tr.resize(500, map); // this makes a 2 channel track and sets the names to F0 and energy
210  //@} code
211 
212  /** A convention is used for channels which comprise
213  components of a multi-dimensional analysis such as
214  cepstra. In such cases the channels are named
215  <replaceable>TYPE_I</replaceable>. The last coefficient is
216  always names <replaceable>TYPE_N</replaceable> regardless of
217  the number of coefficients. This is very useful in extracting
218  a set of related channels without needing to know the order
219  of the analysis.
220 
221  For example, a track map might look like:
222 
223  */
224  //@{ code
225 
226  map.clear();
227  map.append("F0");
228  map.append("energy");
229 
230  map.append("cep_0");
231  map.append("cep_1");
232  map.append("cep_2");
233  map.append("cep_3");
234  map.append("cep_4");
235  map.append("cep_5");
236  map.append("cep_6");
237  map.append("cep_7");
238  map.append("cep_N");
239 
240  tr.resize(500, map); // makes a 11 channel track and sets the names
241  //@} code
242 
243  /** This obviously gets unwieldy quite quickly, so the mapping
244  mechanism provides a short hand for multi-dimensional data.
245 
246  */
247 
248  //@{ code
249  map.clear();
250  map.append("F0");
251  map.append("energy");
252 
253  map.append("$cep-0+8");
254 
255  tr.resize(500, map); // does exactly as above
256  //@} code
257 
258  /** Here $ indicates the special status, "cep" the name of the
259  coefficients, "-0" that the first is number 0 and "+8" that
260  there are 8 more to follow.
261  */
262 
263  //@}
264 
265 
266  /** @name Access single frames or single channels.
267 
268  @id tr-example-frames-and-channels
269 
270  Often functions perform their operations on only a single
271  frame or channel, and the track class provides a general
272  mechanism for doing this.
273 
274  Single frames or channels can be accessed as EST_FVectors:
275  Given a track with 500 frames and 10 channels, the 50th frame
276  can be accessed as:
277  */
278  //@{
279  //@{ code
280  EST_FVector tmp_frame;
281 
282  tr.frame(tmp_frame, 50);
283  //@} code
284  /** now tmp_frame is 10 element vector, which is
285  a window into tr: any changes to the contents of tmp_frame will
286  change tr. tmp_frame cannot be resized. (This operation can
287  be thought in standard C terms as tmp_frame being a pointer
288  to the 5th frame of tr).
289  </para> <para>
290  Likewise with channels:
291  */
292  //@{ code
293  EST_FVector tmp_channel;
294 
295  tr.channel(tmp_channel, 5);
296  //@} code
297  /** Again, tmp_channel is 500 element vector, which is
298  a window into tr: any changes to the contents of tmp_channel will
299  change tr. tmp_channel cannot be resized.
300  </para><para>
301  Channels can also be extracted by name:
302  */
303  //@{ code
304  tr.channel(tmp_channel, "energy");
305  //@} code
306  /** not all the channels need be put into the temporary frame.
307  Imagine we have a track with a F0 channel,a energy channel and
308  10 cepstrum channels. The following makes a frame from the
309  50th frame, which only includes the cepstral information in
310  channels 2 through 11 */
311  //@{ code
312  tr.frame(tmp_frame, 50, 2, 9);
313  //@} code
314  /** Likewise, the 5th channel with only the last 100 frames can be set up
315  as: */
316  //@{ code
317  tr.channel(tmp_channel, 5, 400, 100);
318  //@} code
319  //@}
320  /** @name Access multiple frames or channels.
321  @id tr-example-sub-tracks
322  In addition to extracting single frames and channels, multiple
323  frame and channel portions can be extracted in a similar
324  way. In the following example, we make a sub-track sub, which
325  points to the entire cepstrum portion of a track (channels 2
326  through 11)
327  */
328  //@{
329  //@{ code
330  EST_Track sub;
331 
332  tr.sub_track(sub, 0, EST_ALL, 2, 9);
333 
334  //@} code
335 
336  /** <parameter>sub</parameter> behaves exactly like a normal
337  track in every way, except that it cannot be resized. Its
338  contents behave like a point into the designated portion of
339  <parameter>tr</parameter>, so changing
340  <parameter>sub</parameter> will change<parameter>
341  tr</parameter>.
342 
343  </para><para> The first argument is the
344  <parameter>sub</parameter> track. The second states the start
345  frame and the total number of frames required. EST_ALL is a
346  special constant that specifies that all the frames are
347  required here. The next argument is the start channel number
348  (remember channels are numbered from 0), and the last argument
349  is the total number of channels required. </para><para>
350 
351  This facility is particularly useful for using standard
352  signal processing functions efficiently. For example,
353  the <function>melcep</function> in the signal processing library
354  takes a waveform and produces a mel-scale cepstrum. It determines
355  the order of the cepstral analysis by the number of channels in
356  the track it is given, which has already been allocated to have
357  the correct number of frames and channels.
358 
359  </para><para> The following will process the waveform
360  <parameter>sig</parameter>, produce a 10th order mel cepstrum
361  and place the output in <parameter>sub</parameter>. (For
362  explanation of the other options see
363  <function>melcep</function> */
364  //@{ code
365  EST_Wave sig;
366 
367  melcep(sig, sub, 1.0, 20, 22);
368  //@} code
369 
370  /** because we have made<parameter>sub</parameter> a window
371  into<parameter> tr</parameter>, the melcep function writes its
372  output into the correct location, i.e. channels 2-11 of tr. If
373  it were no for the sub_track facility, either a separate track
374  of the right size would be passed into melcep and then it
375  would be copied into tr (wasteful), or else tr would be passed
376  in and other arguments would have to specify which channels
377  should be written to (messy). </para><para>
378 
379  Sub-tracks can also be set using channel names. The
380  following example does exactly as above, but is referenced by
381  the name of the first channel required and the number of
382  channels to follow: */
383  //@{ code
384 
385  tr.sub_track(sub, 0, EST_ALL, "cep_0", "cep_N");
386  //@} code
387  /** and this specifies the end by a string also:
388  */
389  //@{ code
390  tr.sub_track(sub, 0, EST_ALL, "cep_0", "cep_N");
391  //@} code
392  /** sub_tracks can be any set of continuous frames and
393  channels. For example if a word started at frame 43 and ended
394  and frame 86, the following would set a sub track to that
395  portion: */
396  //@{ code
397 
398  tr.sub_track(sub, 47, 39, "cep_0", "cep_N");
399 
400  //@} code
401 
402  /** We can step through the frames of a Track using a standard
403  * iterator. The frames are returned as one-frame sub-tracks.
404  */
405 
406  //@{ code
407  EST_Track::Entries frames;
408 
409  // print out the time of every 50th track
410  cout << "Times:";
411 
412  for (frames.begin(tr); frames; ++frames)
413  {
414  const EST_Track &frame = *frames;
415  if (frames.n() % 50 ==0)
416  cout << " " << frames.n() << "[" << frame.t() << "]";
417  }
418  cout << "\n";
419 
420  //@} code
421 
422  /** The <function>channel</function>, <function>frame</function>
423  and <function>sub_track</function> functions are most commonly
424  used to write into a track using a convenient
425  sub-portion. Sometimes, however a simple copy is required
426  whose contents can be written without affecting the original.
427 
428  The <member>copy_cub_track</member> function does this */
429  //@{ code
430  EST_Track tr_copy;
431 
432 // tr.copy_sub_track(tr_copy, 47, 39, "cep_0", "cep_N");
433  //@} code
434 
435  /** Individual frames and channels can be copied out into
436  pre-allocated float * arrays as follows:
437  */
438  //@{ code
439  float *channel_buf, *frame_buf;
440  channel_buf = new float[tr.num_frames()];
441  frame_buf = new float[tr.num_channels()];
442 
443  tr.copy_channel_out(5, channel_buf); // copy channel 5 into channel_buf
444  tr.copy_frame_out(43, frame_buf); // copy frame 4 into frame_buf
445  //@} code
446 
447  /** Individual frames and channels can be copied into the track
448  from float * arrays as follows:
449  */
450  //@{ code
451  tr.copy_channel_in(5, channel_buf); // copy channel_buf into channel 5
452  tr.copy_frame_in(43, frame_buf); // copy frame_buf into frame 4
453  //@} code
454  //@}
455 
456 
457  /** @name Auxiliary Channels
458  Auxiliary channels are used for storing frame information other than
459  amplitude coefficients, for example voicing decisions and points of
460  interest in the track.
461 
462  Auxiliary channels always have the same number of frames as the
463  amplitude channels. They are resized by assigning names to the
464  channels that need to be created:
465  */
466  //@{
467  //@{ code
468 
469 
470  EST_StrList aux_names;
471 
472  aux_names.append("voicing");
473  aux_names.append("join_points");
474  aux_names.append("cost");
475 
476  tr.resize_aux(aux_names);
477 
478  //@} code
479  /** The following fills in these three channels with some values:
480  */
481  //@{ code
482 
483  for (i = 0; i < 500; ++i)
484  {
485  tr.aux(i, "voicing") = i;
486  tr.aux(i, "join_points") = EST_String("stuff");
487  tr.aux(i, "cost") = 0.111;
488  }
489  //@} code
490  //@}
491 
492  /** @name File I/O
493  Tracks in various formats can be saved and loaded:
494 
495  Save as a HTK file:
496  */
497  //@{
498  //@{ code
499  if (tr.save("tmp/track.htk", "htk") != write_ok)
500  EST_error("can't save htk file\n");
501  //@} code
502  /** Save as a EST file:
503  */
504  //@{ code
505  if (tr.save("tmp/track.est", "est") != write_ok)
506  EST_error("can't save est file\n");
507  //@} code
508  /** Save as an ascii file:
509  */
510  //@{ code
511  if (tr.save("tmp/track.ascii", "ascii") != write_ok)
512  EST_error("can't save ascii file\n");
513  //@} code
514  /** The file type is automatically determined from the file's
515  header during loading:
516  */
517  //@{ code
518 
519  EST_Track tr2;
520  if (tr2.load("tmp/track.htk") != read_ok)
521  EST_error("can't reload htk\n");
522  //@} code
523 
524  /** If no header is found, the function assumes the
525  file is ascii data, with a fixed frame shift, arranged with rows
526  representing frames and columns channels. In this case, the
527  frame shift must be specified as an argument to this function:
528  */
529  //@{ code
530  if (tr.load("tmp/track.ascii", 0.01) != read_ok)
531  EST_error("can't reload ascii file\n");
532  //@} code
533  //@}
534 
535  exit(0);
536 }
537 
538 //@}
539 
540 
541 
542 
543 
544 
545