docs/speech_tools-2.4.0/pitchmark_8cc_source.html

/*************************************************************************/

/*                                                                       */

/*                Centre for Speech Technology Research                  */

/*                     University of Edinburgh, UK                       */

/*                         Copyright (c) 1996                            */

/*                        All Rights Reserved.                           */

/*                                                                       */

/*  Permission is hereby granted, free of charge, to use and distribute  */

/*  this software and its documentation without restriction, including   */

/*  without limitation the rights to use, copy, modify, merge, publish,  */

/*  distribute, sublicense, and/or sell copies of this work, and to      */

/*  permit persons to whom this work is furnished to do so, subject to   */

/*  the following conditions:                                            */

/*   1. The code must retain the above copyright notice, this list of    */

/*      conditions and the following disclaimer.                         */

/*   2. Any modifications must be clearly marked as such.                */

/*   3. Original authors' names are not deleted.                         */

/*   4. The authors' names are not used to endorse or promote products   */

/*      derived from this software without specific prior written        */

/*      permission.                                                      */

/*                                                                       */

/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */

/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */

/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */

/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */

/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */

/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */

/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */

/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */

/*  THIS SOFTWARE.                                                       */

/*                                                                       */

/*************************************************************************/

/*                 Author: Paul Taylor                                   */

/*                 Date   :  December 1997                               */

/*-----------------------------------------------------------------------*/

/*                   Pitchmark Laryngograph Signals                      */

/*                                                                       */

/*=======================================================================*/


/* Note - this is based on a pitchmarker developed by Mike Macon and

written in matlab.

*/


#include "stdlib.h"

#include "sigpr/EST_filter.h"

#include "sigpr/EST_pitchmark.h"

#include "ling_class/EST_Relation.h"

#include "EST_math.h"

#include "EST_inline_utils.h"

#include "EST_wave_aux.h"

#include "EST_track_aux.h"


void delta(EST_Wave &tr, EST_Wave &d, int regression_length);


EST_Track pitchmark(EST_Wave &lx, int lx_lf, int lx_lo, int lx_hf,

            int lx_ho, int df_lf, int df_lo, int mo, int debug)

{

    EST_Track pm;

    EST_Wave lxdiff;


    pm.set_equal_space(false);

    // pre-filtering


    if (debug)

    cout << "pitchmark 1\n";


    FIRlowpass_double_filter(lx, lx_lf, lx_lo);

    FIRhighpass_double_filter(lx, lx_hf, lx_ho);


    if (debug)

    cout << "pitchmark 2\n";


    if (debug)

    lx.save("tmpfilt.lx");


//    cout << "df " << df_lf << " df_o " << df_lo << endl;


//    lxdiff = lx;

//    differentiate(lxdiff);

    lxdiff.resize(lx.num_samples());

    lxdiff.set_sample_rate(lx.sample_rate());

    delta(lx, lxdiff, 4);


    if (debug)

    lxdiff.save("tmpdiff.lx");


    // it was found that median smoothing worked better here.


    if (df_lo > 0)

    FIRlowpass_double_filter(lxdiff, df_lf, df_lo);


    if (mo > 0)

    simple_mean_smooth(lxdiff, mo);


    if (debug)

    lxdiff.save("tmpfiltdiff.lx");


    neg_zero_cross_pick(lxdiff, pm);


    return pm;

}


EST_Track pitchmark(EST_Wave &lx, EST_Features &op)

{

    EST_Track pm;

    EST_Wave lxdiff;

    int lx_lf, lx_lo, lx_hf, lx_ho, df_lf, df_lo, mo, debug;


    lx_lf = op.present("lx_low_frequency") ?

    op.I("lx_low_frequency") : 400;

    lx_lo = op.present("lx_low_order") ?

    op.I("lx_low_order") : 19;


    lx_hf = op.present("lx_high_frequency") ?

    op.I("lx_high_frequency") : 40;

    lx_ho = op.present("lx_high_order") ?

    op.I("lx_high_order") : 19;


    df_lf = op.present("df_low_frequency") ?

    op.I("df_low_frequency") : 1000;

    df_lo = op.present("df_low_order") ?

    op.I("df_low_order") : 0;


    mo = op.present("median_order") ?

    op.I("median_order") : 19;


    debug = op.present("pm_debug") ? 1 : 0;


    return pitchmark(lx, lx_lf, lx_lo, lx_hf, lx_ho, df_lf, df_lo,

             mo, debug);

}


/** Iterate through track and eliminate any frame whose distance to a

preceding frames is less than min seconds*/


void pm_min_check(EST_Track &pm, float min)

{

    int i, j;


    for (i = j = 0; i < pm.num_frames() - 1; ++i, ++j)

    {

    pm.t(j) = pm.t(i);

    while ((i < (pm.num_frames() - 1)) && ((pm.t(i + 1) - pm.t(i)) < min))

        ++i;

    }

    if (i < pm.num_frames())

    pm.t(j) = pm.t(i);

    pm.resize(j, pm.num_channels());

}


void pm_fill(EST_Track &pm, float new_end, float max, float min, float def)

{

    EST_FVector new_pm;


    if (new_end < 0)

    new_end = pm.end();


//    if (debug)

    // cout<< "new end:" << new_end << endl;

    // largest possible set of new pitchmarks


//    cout << "num frames:" << pm.num_frames() << endl;

//    cout << "num frames:" << pm.end() << endl;

//    cout << "num frames:" << min << endl;

    new_pm.resize(int(new_end / min));

//    cout << "num frames:" << pm.end()/min << endl;

//    cout << "num frames:" << new_pm.n() << endl;


    int i, j, npm=0;

    float last = 0.0;


    int dropped=0, added=0;


    for(j = 0; j < pm.num_frames(); j++)

    {

    float current = pm.t(j);


    if (current > new_end)

        break;


    if (current - last < min)

    {

        // drop current pitchmark

        dropped++;

    }


    else if (current-last > max)

    {

        // interpolate

        int num = ifloor((current - last)/ def);

        float size = (current-last) / num;

        for (i = 1; i <= num; i++)

        {

        new_pm[npm] = last + i * size;

        npm++;

        added++;

        }

    }

    else

    {

        new_pm[npm] = pm.t(j);

        npm++;

    }

    last=current;

    }


    if (new_end - last > max)

    {

    // interpolate

    int num = ifloor((new_end - last)/ def);

    float size = (new_end -last) / num;

    for (i = 1; i <= num; i++)

    {

        new_pm[npm] = last + i * size;

        npm++;

        added++;

    }

    }


//    if (debug)

//  if (dropped>0 || added >0)

//      cout << "Dropped " << dropped<< " and added " << added << " PMs\n";


//    if (debug)

    pm.resize(npm, pm.num_channels());

    for (i = 0; i < npm; i++)

    pm.t(i) = new_pm(i);

}


void neg_zero_cross_pick(EST_Wave &lx, EST_Track &pm)

{

    int i, j;

    pm.resize(lx.num_samples(), EST_CURRENT);


    for (i = 1, j = 0; i < lx.num_samples(); ++i)

    if ((lx.a(i -1) > 0) && (lx.a(i) <= 0))

        pm.t(j++) = lx.t(i);


    pm.resize(j, EST_CURRENT);


    for (i = 0; i < pm.num_frames(); ++i)

    pm.set_value(i);

}


void pm_to_label(EST_Track &pm, EST_Relation &lab)

{

    EST_Item *seg;

    lab.clear();


    for (int i = 0; i < pm.num_frames(); ++i)

    {

    seg = lab.append();

    seg->set("name","");

    seg->set("end",pm.t(i));

    }

}


void pm_to_f0(EST_Track &pm, EST_Track &f0)

{

    float prev_pm = 0.0;

    f0 = pm;

    f0.resize(EST_ALL, 1);


    for (int i = 0; i < f0.num_frames(); ++i)

    {

    f0.a(i, 0) = 1.0 / (f0.t(i) - prev_pm);

    prev_pm = f0.t(i);

    }

}


void pm_to_f0(EST_Track &pm, EST_Track &fz, float shift)

{

    int i;

    float period;


    fz.resize((int)(pm.end()/shift), 1);

    fz.fill_time(shift);


    for (i = 0; i < fz.num_frames() -1 ; ++i)

    {

        period = get_time_frame_size(pm, pm.index_below(fz.t(i)));

    fz.a(i) = 1.0 /period;

    }

}