docs/speech_tools-2.4.0/pda__main_8cc_source.html

/*************************************************************************/

/*                                                                       */

/*                Centre for Speech Technology Research                  */

/*                     University of Edinburgh, UK                       */

/*                         Copyright (c) 1996                            */

/*                        All Rights Reserved.                           */

/*                                                                       */

/*  Permission is hereby granted, free of charge, to use and distribute  */

/*  this software and its documentation without restriction, including   */

/*  without limitation the rights to use, copy, modify, merge, publish,  */

/*  distribute, sublicense, and/or sell copies of this work, and to      */

/*  permit persons to whom this work is furnished to do so, subject to   */

/*  the following conditions:                                            */

/*   1. The code must retain the above copyright notice, this list of    */

/*      conditions and the following disclaimer.                         */

/*   2. Any modifications must be clearly marked as such.                */

/*   3. Original authors' names are not deleted.                         */

/*   4. The authors' names are not used to endorse or promote products   */

/*      derived from this software without specific prior written        */

/*      permission.                                                      */

/*                                                                       */

/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */

/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */

/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */

/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */

/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */

/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */

/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */

/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */

/*  THIS SOFTWARE.                                                       */

/*                                                                       */

/*************************************************************************/

/*                    Author :  Paul Taylor                              */

/*                    Date   :  May 1994                                 */

/*-----------------------------------------------------------------------*/

/*             Pitch Detection Algorithm Main routine                    */

/*                                                                       */

/*=======================================================================*/

#include <fstream>

#include "EST.h"

#include "sigpr/EST_sigpr_utt.h"

#include "EST_cmd_line_options.h"


void set_parameters(EST_Features &a_list, EST_Option &al);


void option_override(EST_Features &op, EST_Option al,

             const EST_String &option, const EST_String &arg);


static int save_pm(EST_String filename, EST_Track fz);


/** @name <command>pda</command> <emphasis>Pitch Detection Algorithm</emphasis>

    @id pda-manual

  * @toc

 */


//@{


/**@name Synopsis

  */

//@{


//@synopsis


/**

pda is a pitch detection algorithm that produces a fundamental frequency

contour from a speech waveform file. At present only the

super resolution pitch determination algorithm is implemented.

See (Medan, Yair, and Chazan, 1991) and (Bagshaw et al., 1993) for a detailed

description of the algorithm.

</para><para>


The default values given below were found to optimise the performance

of the pitch determination algorithm for speech data sampled at 20kHz

using a 16\-bit waveform and low pass filter with a 600Hz cut-off

frequency and more than \-85dB rejection above 700Hz. The best

performances occur if the [\-p] flag is passed.  </para><para>

*/


//@}


/**@name Options

  */

//@{


//@options


//@}


int main (int argc, char *argv[])

{

    EST_Track fz;

    EST_Wave sig;

    EST_Option al;

    EST_Features op;

    EST_String out_file("-");

    EST_StrList files;


    parse_command_line

    (argc, argv,

       EST_String("[input file] -o [output file] [options]\n")+

       "Summary: pitch track waveform files\n"

       "use \"-\" to make input and output files stdin/out\n"

       "-h               Options help\n\n"+

       options_wave_input()+

       options_pda_general()+

       options_pda_srpd()+

       options_track_output(),

            files, al);


    default_pda_options(op);

    set_parameters(op, al);


    if (read_wave(sig, files.first(), al) != format_ok)

    exit(-1);


    out_file = al.present("-o") ? al.val("-o") : (EST_String)"-";


    pda(sig, fz, op);       // do f0 tracking


    if (al.present("-pm"))

    save_pm(out_file, fz);

    else

    fz.save(out_file, op.S("f0_file_type", "0"));


    if (al.present("-diff"))

    {

    fz = differentiate(fz);

    fz.save(out_file + ".diff", op.S("f0_file_type", "0"));

    }

    return 0;

}


void set_parameters(EST_Features &op, EST_Option &al)

{

    op.set("srpd_resize", 1);


    // general options

    option_override(op, al, "pda_frame_shift", "-shift");

    option_override(op, al, "pda_frame_length", "-length");

    option_override(op, al, "max_pitch", "-fmax");

    option_override(op, al, "min_pitch", "-fmin");


    // low pass filtering options.

    option_override(op, al, "lpf_cutoff", "-u");

    option_override(op, al, "lpf_order", "-forder");


    option_override(op, al, "decimation", "-d");

    option_override(op, al, "noise_floor", "-n");

    option_override(op, al, "min_v2uv_coef_thresh", "-m");

    option_override(op, al, "v2uv_coef_thresh_ratio", "-R");

    option_override(op, al, "v2uv_coef_thresh", "-H");

    option_override(op, al, "anti_doubling_thresh", "-t");

    option_override(op, al, "peak_tracking", "-P");


    option_override(op, al, "f0_file_type", "-otype");

    option_override(op, al, "wave_file_type", "-itype");


    if (al.val("-L", 0) == "true")

    op.set("do_low_pass", "true");

    if (al.val("-R", 0) == "true")

    op.set("do_low_pass", "false");


/*    op.set("lpf_cutoff",al.val("-u", 0));

    op.set("lpf_order",al.val("-forder", 0));


    //sprd options

    op.set("decimation", al.val("-d", 0));

    op.set("noise_floor",   al.val("-n", 0));

    op.set("min_v2uv_coef_thresh", al.val("-m", 0));

    op.set("v2uv_coef_thresh_ratio", al.val("-r", 0));

    op.set("v2uv_coef_thresh", al.val("-H", 0));

    op.set("anti_doubling_thresh", al.val("-t", 0));

    op.set("peak_tracking", al.val("-P", 0));

    if (al.val("-L", 0) == "true")

    op.set("do_low_pass", "true");

    if (al.val("-R", 0) == "true")

    op.set("do_low_pass", "false");

    op.set("f0_file_type", al.val("-otype", 0));

    op.set("wave_file_type", al.val("-itype", 0));

*/

}


/*    a_list.override_val("sample_rate", al.val("-f", 0));

    a_list.override_val("min_pitch",  al.val("-fmin", 0));

    a_list.override_val("max_pitch",  al.val("-fmax", 0));

    a_list.override_val("pda_frame_shift", al.val("-s", 0));

    a_list.override_val("pda_frame_length",al.val("-l", 0));


    // low pass filtering options.

    a_list.override_val("lpf_cutoff",al.val("-u", 0));

    a_list.override_val("lpf_order",al.val("-forder", 0));


    //sprd options

    a_list.override_val("decimation", al.val("-d", 0));

    a_list.override_val("noise_floor",   al.val("-n", 0));

    a_list.override_val("min_v2uv_coef_thresh", al.val("-m", 0));

    a_list.override_val("v2uv_coef_thresh_ratio", al.val("-r", 0));

    a_list.override_val("v2uv_coef_thresh", al.val("-H", 0));

    a_list.override_val("anti_doubling_thresh", al.val("-t", 0));

    a_list.override_val("peak_tracking", al.val("-P", 0));

    if (al.val("-L", 0) == "true")

    a_list.override_val("do_low_pass", "true");

    if (al.val("-R", 0) == "true")

    a_list.override_val("do_low_pass", "false");

    a_list.override_val("f0_file_type", al.val("-otype", 0));

    a_list.override_val("wave_file_type", al.val("-itype", 0));

*/


static int save_pm(EST_String filename, EST_Track fz)

{

    ostream *outf;

    float position, period;


    if (filename == "-")

    outf = &cout;

    else

    outf = new ofstream(filename);


    if (!(*outf))

    {

    cerr << "save_pm: can't write to file \"" << filename << "\"" << endl;

    return -1;

    }


    *outf << "XAO1\n\n";    // xmg header identifier.

    *outf << "LineType        bars \n";

    *outf << "LineStyle       solid \n";

    *outf << "LineWidth       0 \n";

    *outf << "Freq 16\n";

    *outf << "Format  Binary \n";

    *outf << char(12) << "\n";  // control L character


    position = 0.0;

    int gap = 0;

    for (int i = 0; i < fz.num_frames(); ++i)

    {

    if (fz.val(i))

    {

        if (gap)

        {

        position = fz.t(i);

        gap = 0;

        }

        period = 1.0 / fz.a(i);

        *outf << (position + period) * 1000.0 << endl;

        position += period;

    }

    else

        gap = 1;

    }


    if (outf != &cout)

    delete outf;


    return 0;

}


/**@name Examples


Pitch detection on typical male voice, using low pass filtering:

<screen>

$ pda kdt_010.wav -o kdt_010.f0 -fmin 80 -fmax 200 -L

</screen>

*/

//@{


//@}

//@}