docs/speech_tools-2.4.0/ch__track__main_8cc_source.html

/*************************************************************************/

/*                                                                       */

/*                Centre for Speech Technology Research                  */

/*                     University of Edinburgh, UK                       */

/*                    Copyright (c) 1994,1995,1996                       */

/*                        All Rights Reserved.                           */

/*                                                                       */

/*  Permission is hereby granted, free of charge, to use and distribute  */

/*  this software and its documentation without restriction, including   */

/*  without limitation the rights to use, copy, modify, merge, publish,  */

/*  distribute, sublicense, and/or sell copies of this work, and to      */

/*  permit persons to whom this work is furnished to do so, subject to   */

/*  the following conditions:                                            */

/*   1. The code must retain the above copyright notice, this list of    */

/*      conditions and the following disclaimer.                         */

/*   2. Any modifications must be clearly marked as such.                */

/*   3. Original authors' names are not deleted.                         */

/*   4. The authors' names are not used to endorse or promote products   */

/*      derived from this software without specific prior written        */

/*      permission.                                                      */

/*                                                                       */

/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */

/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */

/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */

/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */

/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */

/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */

/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */

/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */

/*  THIS SOFTWARE.                                                       */

/*                                                                       */

/*************************************************************************/

/*                      Author :  Paul Taylor                            */

/*                      Date   :  June 1994                              */

/*-----------------------------------------------------------------------*/

/*                  EST_Track file manipulation program                  */

/*                                                                       */

/*=======================================================================*/


#include "EST.h"

#include "EST_cmd_line_options.h"


#define DEFAULT_TIME_SCALE 0.001


int StrListtoIList(EST_StrList &s, EST_IList &il);

void extract_channel(EST_Track &orig, EST_Track &nt, EST_IList &ch_list);


EST_write_status save_snns_pat(const EST_String filename,

                   EST_TrackList &inpat, EST_TrackList &outpat);


EST_read_status read_TrackList(EST_TrackList &tlist, EST_StrList &files,

                   EST_Option &al);


void extract(EST_Track &tr, EST_Option &al);

/** @name <command>ch_track</command> <emphasis>Track file manipulation</emphasis>

  * @id ch-track-manual

  * @toc

 */


//@{


/**@name Synopsis

  */

//@{


//@synopsis


/**

ch_track is used to manipulate the format of a track

file. Operations include:


<itemizedlist>

<listitem><para>file format conversion</para></listitem>

<listitem><para>smoothing</para></listitem>

<listitem><para>changing the frame spacing of a track (resampling)</para></listitem>

<listitem><para>producing differentiated and delta tracks</para></listitem>

<listitem><para>Using a threshold to convert a track file to a label file</para></listitem>


<listitem><para>making multiple input files into a single multi-channel output file</para></listitem>

<listitem><para>extracting a single channel from a multi-channel track</para></listitem>

<listitem><para>extracting a time-delimited portion of the waveform</para></listitem>

</itemizedlist>


 */


//@}


/**@name Options

  */

//@{


//@options


//@}


int main(int argc, char *argv[])

{

    EST_String in_file("-"), out_file("-");

    EST_Option al, settings;

    EST_String fname, ftmp;

    EST_StrList files;

    EST_Track tr;

    EST_TrackList trlist;

    EST_Litem *p;


    parse_command_line(

    argc, argv,

    EST_String("[input file] -o [output file] [options]\n")+

    "Summary: change/copy track files\n"

    "use \"-\" to make input and output files stdin/out\n"

    "-h               Options help\n"+

    options_track_input()+ "\n"+

    options_track_output()+ "\n"

    "-info  Print information about file and header. \n"

    "    This option gives useful information such as file \n"

    "    length, file type, channel names. No output is produced\n\n"

    "-track_names <string> \n"

    "    File containing new names for output channels\n\n"

    "-diff Differentiate contour. This performs simple \n"

    "    numerical differentiation on the contour by \n"

    "    subtracting the amplitude of the current frame \n"

        "    from the amplitude of the next. Although quick, \n"

    "    this technique is crude and not recommende as the \n"

    "    estimation of the derivate is done on only one point\n\n"

    "-delta <int> Make delta coefficients (better form of differentiate).\n"

    "    The argument to this option is the regression length of \n"

    "    of the delta calculation and can be between 2 and 4 \n\n"

    "-sm <float> Length of smoothing window in seconds. Various types of \n"

    "    smoothing are available for tracks. This options specifies \n"

    "    length of the smooting window which effects the degree of \n"

    "    smoothing, i.e. a longer value means more smoothing \n\n"

    "-smtype <string>  Smooth type, median or mean\n"

    "-style <string>  Convert track to other form.  Currently only one form \n"

    "   \"label\" is supported. This uses a specified cut off to \n"

    "    make a label file, with two labels, one for above the \n"

    "    cut off (-pos) and one for below (-neg)\n\n"

    "-t <float> threshold for track to label conversion \n"

    "-neg <string> Name of negative label in track to label conversion \n"

    "-pos <string> Name of positive label in track to label conversion \n"

    "-pc <string>  Combine given tracks in parallel.  If option \n"

    "     is longest, pad shorter tracks to longest, else if \n"

    "     first pad/cut to match first input track \n" +

    options_track_filetypes_long(),

    files, al);


/*redundant options

    "-time_channel <string>\n"+

    "        Which track in track file holds pitchmark times\n"+

    "-time_scale <float>    \n"+

    "        Scale of pitchmarks (default 0.001 = milliseconds)\n"+

*/


    override_lib_ops(settings, al);

    out_file = al.present("-o") ? al.val("-o") : (EST_String)"-";


    EST_TokenStream ts;


//    ts.open(files.first());

//    tr.load(ts);

//    cout << tr;


    if (read_TrackList(trlist, files, al) != read_ok)

    exit(0);


    if (files.length() == 0)

    {

    cerr << argv[0] << ": no input files specified\n";

    exit(-1);

    }


    if (al.present("-info"))

    {

    for (p = trlist.head(); p; p = p->next())

        track_info(trlist(p));

    exit(0);

    }


    if (al.present("-pc"))       // parallelize them

    ParallelTracks(tr, trlist, al.val("-pc"));


    else if (al.val("-otype", 0) == "snns")

    {   // sometime this will generalise for multiple input files

    EST_TrackList inpat, outpat;

    inpat.append(trlist.nth(0));

    outpat.append(trlist.nth(1));

    save_snns_pat(out_file, inpat, outpat);

    exit(0);

    }

    else                         // concatenate them

    {

    tr.resize(0, tr.num_channels());

    // Reorg -- fix += to resize to largest num_channels (with warning)

    for (p = trlist.head(); p; p = p->next())

        tr += trlist(p);

    }


    if (al.present("-S"))

    tr.sample(al.fval("-S"));

    if (al.present("-sm"))

    {

    track_smooth(tr, al.fval("-sm"),al.val("-smtype"));

    }


    if (al.present("-diff") && al.present("-delta"))

    {

    cerr << "Using -diff and -delta together makes no sense !\n";

    exit(-1);

    }

    if (al.present("-diff"))

    {

    tr = differentiate(tr);

    }

    if (al.present("-delta"))

    {

    EST_Track ntr = tr; // to copy size !;

    delta(tr,ntr,al.ival("-delta"));

    tr = ntr;

    }


    if (al.present("-c"))

    {

    EST_StrList s;

    EST_Track ntr;

    EST_IList il;

    StringtoStrList(al.val("-c"), s, " ,"); // separator can be space or comma

    StrListtoIList(s, il);

    extract_channel(tr, ntr, il);

    tr = ntr;

    }


    if (al.present("-start") || al.present("-end")

    || al.present("-to") || al.present("-from"))

    extract(tr, al);


//    tr.assign_map(&LPCTrackMap);

//    tr.set_space_type("VARI");


    // optionally rename output tracks before saving


    if (al.present("-track_names"))

    {

    EST_StrList new_names;

    if(load_StrList(al.val("-track_names"),new_names) != format_ok)

    {

        cerr << "Failed to load new track names file." << endl;

        exit(-1);

    }

    /*

    if (tr.num_channels() != new_names.length())

    {

        cerr << "Number of names in output track names file (";

        cerr << new_names.length() << ") " << endl;

        cerr << " does not match number of output channels (";

        cerr << tr.num_channels() << ")" << endl;

        exit(-1);

    }


    EST_Litem *np;

    int ni;

    for (np = new_names.head(),ni=0; np; np = np->next(),ni++)

        tr.set_channel_name(new_names(np),ni);

    */

    tr.resize(EST_CURRENT, new_names);

    }


    // track_info(tr);


/*    tr.resize(EST_CURRENT, 10);


    cout << "new\n";

    track_info(tr);


    EST_StrList x;

    x.append("a");

    x.append("c");

    x.append("d");


    cout << "new\n";

    track_info(tr);

*/


    // Write out file in appropriate format


    if (al.val("-style",0) == "label")

    {

    EST_Relation lab;

    if (al.present("-t"))

        track_to_label(tr, lab, al.fval("-t"));

    else

        track_to_label(tr, lab);

    if (al.present("-pos"))

        change_label(lab, "pos", al.val("-pos"));

    if (al.present("-neg"))

        change_label(lab, "neg", al.val("-neg"));

    if (lab.save(out_file) != write_ok)

        exit(-1);

    }

/*    else if (al.val("-style",0) == "pm")

    {

    EST_Relation lab;


    if (!al.present("-f"))

    {

        cerr << "must specify sample rate (with -f) for pm style\n";

        exit(-1);

    }

    int sample_rate = al.ival("-f", 0);


    track_to_pm(tr, sample_rate, lab);


    if (lab.save(out_file) != write_ok)

        exit(-1);

    }

*/

    else

    {

    if (tr.save(out_file, al.val("-otype")) != write_ok)

        exit(-1);

    }


    return 0;

}


void override_lib_ops(EST_Option &a_list, EST_Option &al)

{

    a_list.override_val("ishift", al.val("-s", 0));

    a_list.override_val("color", al.val("-color", 0));

    a_list.override_val("in_track_file_type", al.val("-itype", 0));

    a_list.override_val("out_track_file_type", al.val("-otype", 0));

    a_list.override_val("tr_to_label_thresh", al.val("-t", 0));

    a_list.override_fval("time_scale", DEFAULT_TIME_SCALE);


    if (al.val("-style", 0) == "label")

    a_list.override_val("lab_file_type", al.val("-otype", 0));

    if (al.present("-time_scale"))

    a_list.override_fval("time_scale", al.fval("-time_scale", 1));

    if (al.present("-time_channel"))

    a_list.override_val("time_channel", al.sval("-time_channel", 1));

}


/** @name Making multiple tracks into a single track


If multiple input files are specified, by default they are concatenated into

the output file.

<para>

<screen>

$ ch_track kdt_010.tr kdt_011.tr kdt_012.tr kdt_013.tr -o out.tr

</screen>

</para>

<para>

In the above example, 4 multi channel input files are converted to

one single channel output file. Multi-channel tracks can

concatenated provided they all have the same number of input channels.


</para><para>


Multiple input files can be made into a multi-channel output file by

using the -pc option:


</para><para>

<screen>

$ ch_track kdt_010.tr kdt_011.tr kdt_012.tr kdt_013.tr -o -pc longest out.tr

</screen>

</para>

<para>

The argument to -pc can either be longest, in which the output

track is the length of the longest input file, or first in which it

is the length of the first input file.


*/


//@{

//@}


/** @name Extracting channels from multi-channel tracks


The -c option is used to specify channels which should be extracted

from the input.  If the input is a 4 channel track,

</para><para>

<screen>

$ ch_track kdt_m.tr -o a.tr -c "0 2"

</screen>

</para>

<para>

will extract the 0th and 2nd channel (counting starts from 0). The

argument to -c can be either a single number of a list of numbers

(wrapped in quotes).


 */

//@{

//@}


/** @name Extracting of a single region from a track


There are several ways of extracting a region of a track. The

simplest way is by using the start, end, to and from commands to

delimit a sub portion of the input track. For example

</para><para>

<screen>

$ ch_track kdt_010.tr -o small.tr -start 1.45 -end 1.768

</screen>

</para>

<para>

extracts a subtrack starting at 1.45 seconds and extending to 1.768 seconds.

alternatively,

</para><para>

<screen>

$ ch_track kdt_010.tr -o small.tr -from 50 -to 100

</screen>

</para>

<para>

extracts a subtrack starting at 50 frames and extending to 100

frames. Times and frames can be mixed in sub-track extraction. The

output track will have the same number of channels as the input track.


*/

//@{

//@}


/** @name Adding headers and format conversion


It is usually a good idea for all track files to have headers as this

way different files can be handled safely. ch_track provides a means

of adding headers to unheadered files. These files are assumed to

be ascii floats with one channel per line.


The following adds a header to an ascii file.

</para>

<para>

<screen>

$ ch_track kdt_010.atr -o kdt_010.h5.tr -otype est -s 0.01

</screen>

</para>

<para>

ch_track can change the frame shift of a fixed frame file, or convert

a variable frame shift file into a fixed frame shift.  At present this

is done with a very crude resampling technique and hence the output

file may suffer from anti-aliasing distortion.</para><para>


Change to a frame spacing of 0.02 seconds:

</para><para>

<screen>

$ ch_track kdt_010.tr -o kdt_010.tr2 -S 0.02

</screen>

*/

  //@{

  //@}


//@}