Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
rateconv.cc
1 /*
2  * $Id: rateconv.cc,v 1.5 2014/04/07 15:32:10 robert Exp $
3  *
4  * RATECONV.C
5  *
6  * Convert sampling rate stdin to stdout
7  *
8  * Copyright (c) 1992, 1995 by Markus Mummert
9  *
10  *****************************************************************************
11  * MODIFIED BY Alan W Black (awb@cstr.ed.ac.uk)
12  * Make it compilable under C++
13  * and integrate into Edinburgh Speech Tools (i.e. no longer
14  * reads from stdin / writes to stdout)
15  * Removed interface functions
16  * ansified function calls
17  * made it work in floats rather than ints
18  * I got the original from a random linux site, the original
19  * author's email is <mum@mmk.e-technik.tu-muenchen.de>
20  *****************************************************************************
21  *
22  * Redistribution and use of this software, modification and inclusion
23  * into other forms of software are permitted provided that the following
24  * conditions are met:
25  *
26  * 1. Redistributions of this software must retain the above copyright
27  * notice, this list of conditions and the following disclaimer.
28  * 2. If this software is redistributed in a modified condition
29  * it must reveal clearly that it has been modified.
30  *
31  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
32  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
33  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
34  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR
35  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
36  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
37  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
38  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
39  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
41  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
42  * DAMAGE.
43  *
44  *
45  * history: 2.9.92 begin coding
46  * 5.9.92 fully operational
47  * 14.2.95 provide BIG_ENDIAN, SWAPPED_BYTES_DEFAULT
48  * switches, Copyright note and References
49  * 25.11.95 changed XXX_ENDIAN to I_AM_XXX_ENDIAN
50  * default gain set to 0.8
51  * 3.12.95 stereo implementation
52  * SWAPPED_BYTES_DEFAULT now HBYTE1ST_DEFAULT
53  * changed [L/2] to (L-1)/2 for exact symmetry
54  *
55  *
56  * IMPLEMENTATION NOTES
57  *
58  * Converting is achieved by interpolating the input samples in
59  * order to evaluate the represented continuous input slope at
60  * sample instances of the new rate (resampling). It is implemented
61  * as a polyphase FIR-filtering process (see reference). The rate
62  * conversion factor is determined by a rational factor. Its
63  * nominator and denominator are integers of almost arbitrary
64  * value, limited only by coefficient memory size.
65  *
66  * General rate conversion formula:
67  *
68  * out(n*Tout) = SUM in(m*Tin) * g((n*d/u-m)*Tin) * Tin
69  * over all m
70  *
71  * FIR-based rate conversion formula for polyphase processing:
72  *
73  * L-1
74  * out(n*Tout) = SUM in(A(i,n)*Tin) * g(B(i,n)*Tin) * Tin
75  * i=0
76  *
77  * A(i,n) = i - (L-1)/2 + [n*d/u]
78  * = i - (L-1)/2 + [(n%u)*d/u] + [n/u]*d
79  * B(i,n) = n*d/u - [n*d/u] + (L-1)/2 - i
80  * = ((n%u)*d/u)%1 + (L-1)/2 - i
81  * Tout = Tin * d/u
82  *
83  * where:
84  * n,i running integers
85  * out(t) output signal sampled at t=n*Tout
86  * in(t) input signal sampled in intervals Tin
87  * u,d up- and downsampling factor, integers
88  * g(t) interpolating function
89  * L FIR-length of realized g(t), integer
90  * / float-division-operator
91  * % float-modulo-operator
92  * [] integer-operator
93  *
94  * note:
95  * (L-1)/2 in A(i,n) can be omitted as pure time shift yielding
96  * a causal design with a delay of ((L-1)/2)*Tin.
97  * n%u is a cyclic modulo-u counter clocked by out-rate
98  * [n/u]*d is a d-increment counter, advanced when n%u resets
99  * B(i,n)*Tin can take on L*u different values, at which g(t)
100  * has to be sampled as a coefficient array
101  *
102  * Interpolation function design:
103  *
104  * The interpolation function design is based on a sinc-function
105  * windowed by a gaussian function. The former determines the
106  * cutoff frequency, the latter limits the necessary FIR-length by
107  * pushing the outer skirts of the resulting impulse response below
108  * a certain threshold fast enough. The drawback is a smoothed
109  * cutoff inducing some aliasing. Due to the symmetry of g(t) the
110  * group delay of the filtering process is constant (linear phase).
111  *
112  * g(t) = 2*fgK*sinc(pi*2*fgK*t) * exp(-pi*(2*fgG*t)**2)
113  *
114  * where:
115  * fgK cutoff frequency of sinc function in f-domain
116  * fgG key frequency of gaussian window in f-domain
117  * reflecting the 6.82dB-down point
118  *
119  * note:
120  * Taking fsin=1/Tin as the input sampling frequency, it turns out
121  * that in conjunction with L, u and d only the ratios fgK/(fsin/2)
122  * and fgG/(fsin/2) specify the whole process. Requiring fsin, fgK
123  * and fgG as input purposely keeps the notion of absolute
124  * frequencies.
125  *
126  * Numerical design:
127  *
128  * Samples are expected to be 16bit-signed integers, alternating
129  * left and right channel in case of stereo mode- The byte order
130  * per sample is selectable. FIR-filtering is implemented using
131  * 32bit-signed integer arithmetic. Coefficients are scaled to
132  * find the output sample in the high word of accumulated FIR-sum.
133  *
134  * Interpolation can lead to sample magnitudes exceeding the
135  * input maximum. Worst case is a full scale step function on the
136  * input. In this case the sinc-function exhibits an overshoot of
137  * 2*9=18percent (Gibb's phenomenon). In any case sample overflow
138  * can be avoided by a gain of 0.8.
139  *
140  * If u=d=1 and if the input stream contains only a single sample,
141  * the whole length of the FIR-filter will be written to the output.
142  * In general the resulting output signal will be (L-1)*fsout/fsin
143  * samples longer than the input signal. The effect is that a
144  * finite input sequence is viewed as padded with zeros before the
145  * `beginning' and after the `end'.
146  *
147  * The output lags ((L-1)/2)*Tin behind to implement g(t) as a
148  * causal system corresponding to a causal relationship of the
149  * discrete-time sequences in(m/fsin) and out(n/fsout) with
150  * respect to a sequence time origin at t=n*Tin=m*Tout=0.
151  *
152  *
153  * REFERENCES
154  *
155  * Crochiere, R. E., Rabiner, L. R.: "Multirate Digital Signal
156  * Processing", Prentice-Hall, Englewood Cliffs, New Jersey, 1983
157  *
158  * Zwicker, E., Fastl, H.: "Psychoacoustics - Facts and Models",
159  * Springer-Verlag, Berlin, Heidelberg, New-York, Tokyo, 1990
160  */
161 
162 #include <cmath>
163 #include <cstdio>
164 #include <fcntl.h>
165 #include <cstring>
166 #include "rateconv.h"
167 
168 /*
169  * adaptable defines and globals
170  */
171 #define BYTE char /* signed or unsigned */
172 #define WORD short /* signed or unsigned, fit two BYTEs */
173 #define LONG int /* signed, fit two WORDs */
174 
175 #ifndef MAXUP
176 #define MAXUP 0x400 /* MAXUP*MAXLENGTH worst case malloc */
177 #endif
178 
179 #ifndef MAXLENGTH
180 #define MAXLENGTH 0x400 /* max FIR length */
181 #endif
182  /* accounts for mono samples, means */
183 #define OUTBUFFSIZE (2*MAXLENGTH) /* fit >=MAXLENGHT stereo samples */
184 #define INBUFFSIZE (4*MAXLENGTH) /* fit >=2*MAXLENGTH stereo samples */
185 #define sqr(a) ((a)*(a))
186 
187 #ifndef M_PI
188 #define M_PI 3.14159265358979
189 #endif
190 
191 /* AWB deleted previous byte swap globals, byteswap is done external to */
192 /* this function */
193 
194 #ifdef STEREO_DEFAULT
195 static int g_monoflag = 0;
196 #else
197 static int g_monoflag = -1;
198 #endif
199 
200 /*
201  * other globals
202  */
203 static double g_ampli = 0.8; /* default gain, don't change */
204 static int
205 /* g_infilehandle = 0, */ /* stdin */
206 /* g_outfilehandle = 1, */ /* stdout */
207  g_firlen, /* FIR-length */
208  g_up, /* upsampling factor */
209  g_down /* downsampling factor */
210 ;
211 
212 static float
213  g_sin[INBUFFSIZE], /* input buffer */
214  g_sout[OUTBUFFSIZE], /* output buffer */
215  *g_coep; /* coefficient array pointer */
216 
217 static double
218  g_fsi, /* input sampling frequency */
219  g_fgk, /* sinc-filter cutoff frequency */
220  g_fgg /* gaussian window key frequency */
221 ; /* (6.8dB down freq. in f-domain) */
222 
223 
224 /*
225  * evaluate sinc(x) = sin(x)/x safely
226  */
227 static double sinc(double x)
228 {
229  return(fabs(x) < 1E-50 ? 1.0 : sin(fmod(x,2*M_PI))/x);
230 }
231 
232 /*
233  * evaluate interpolation function g(t) at t
234  * integral of g(t) over all times is expected to be one
235  */
236 static double interpol_func(double t,double fgk,double fgg)
237 {
238  return (2*fgk*sinc(M_PI*2*fgk*t)*exp(-M_PI*sqr(2*fgg*t)));
239 }
240 
241 /*
242  * evaluate coefficient from i, q=n%u by sampling interpolation function
243  * and scale it for integer multiplication used by FIR-filtering
244  */
245 static float coefficient(int i,int q,int firlen,double fgk,double fgg,
246  double fsi,int up,int down,double amp)
247 {
248  float val;
249  double d;
250 
251  d = interpol_func((fmod(q*down/(double)up,1.) + (firlen-1)/2. - i) / fsi,
252  fgk,
253  fgg);
254  val = amp * d/fsi;
255  return val;
256 }
257 
258 /*
259  * transfer n floats from s to d
260  */
261 static void transfer_int(float *s,float *d,int n)
262 {
263  memmove(d,s,sizeof(float)*n);
264 }
265 
266 /*
267  * zerofill n floats from s
268  */
269 static void zerofill(float *s,int n)
270 {
271  memset(s,0,n*(sizeof(float)));
272 }
273 
274 /*
275  * FIR-routines, mono and stereo
276  * this is where we need all the MIPS
277  */
278 void fir_mono(float *inp,float *coep,int firlen,float *outp)
279 {
280  float akku = 0, *endp;
281  int n1 = (firlen / 8) * 8, n0 = firlen % 8;
282 
283  endp = coep + n1;
284  while (coep != endp) {
285  akku += *inp++ * *coep++;
286  akku += *inp++ * *coep++;
287  akku += *inp++ * *coep++;
288  akku += *inp++ * *coep++;
289  akku += *inp++ * *coep++;
290  akku += *inp++ * *coep++;
291  akku += *inp++ * *coep++;
292  akku += *inp++ * *coep++;
293  }
294 
295  endp = coep + n0;
296  while (coep != endp) {
297  akku += *inp++ * *coep++;
298  }
299 
300  *outp = akku;
301 }
302 
303 static void fir_stereo(float *inp,float *coep,int firlen,float *out1p,float *out2p)
304 {
305  float akku1 = 0, akku2 = 0, *endp;
306  int n1 = (firlen / 8) * 8, n0 = firlen % 8;
307 
308  endp = coep + n1;
309  while (coep != endp) {
310  akku1 += *inp++ * *coep;
311  akku2 += *inp++ * *coep++;
312  akku1 += *inp++ * *coep;
313  akku2 += *inp++ * *coep++;
314  akku1 += *inp++ * *coep;
315  akku2 += *inp++ * *coep++;
316  akku1 += *inp++ * *coep;
317  akku2 += *inp++ * *coep++;
318  akku1 += *inp++ * *coep;
319  akku2 += *inp++ * *coep++;
320  akku1 += *inp++ * *coep;
321  akku2 += *inp++ * *coep++;
322  akku1 += *inp++ * *coep;
323  akku2 += *inp++ * *coep++;
324  akku1 += *inp++ * *coep;
325  akku2 += *inp++ * *coep++;
326  }
327 
328  endp = coep + n0;
329  while (coep != endp) {
330  akku1 += *inp++ * *coep;
331  akku2 += *inp++ * *coep++;
332  }
333  *out1p = akku1;
334  *out2p = akku2;
335 }
336 
337 /*
338  * filtering from input buffer to output buffer;
339  * returns number of processed samples in output buffer:
340  * if it is not equal to output buffer size,
341  * the input buffer is expected to be refilled upon entry, so that
342  * the last firlen numbers of the old input buffer are
343  * the first firlen numbers of the new input buffer;
344  * if it is equal to output buffer size, the output buffer
345  * is full and is expected to be stowed away;
346  *
347  */
348 static int inbaseidx = 0, inoffset = 0, cycctr = 0, outidx = 0;
349 
350 static int filtering_on_buffers
351  (float *inp,int insize,float *outp, int outsize,
352  float *coep,int firlen,int up,int down,int monoflag)
353 {
354 
355  if (monoflag) {
356  while (-1) {
357  inoffset = (cycctr * down)/up;
358  if ((inbaseidx + inoffset + firlen) > insize) {
359  inbaseidx -= insize - firlen + 1;
360  return(outidx);
361  }
362  fir_mono(inp + inoffset + inbaseidx,
363  coep + cycctr * firlen,
364  firlen, outp + outidx++);
365  cycctr++;
366  if (!(cycctr %= up))
367  inbaseidx += down;
368  if (!(outidx %= outsize))
369  return(outsize);
370  }
371  }
372  else {
373  /*
374  * rule how to convert mono routine to stereo routine:
375  * firlen, up, down and cycctr relate to samples in general,
376  * wether mono or stereo; inbaseidx, inoffset and outidx as
377  * well as insize and outsize still account for mono samples.
378  */
379  while (-1) {
380  inoffset = 2*((cycctr * down)/up);
381  if ((inbaseidx + inoffset + 2*firlen) > insize) {
382  inbaseidx -= insize - 2*firlen + 2;
383  return(outidx);
384  }
385 /* order?
386  fir_stereo(inp + inoffset + inbaseidx,
387  coep + cycctr * firlen, firlen,
388  outp + outidx++, outp + outidx++);
389 
390 */
391  fir_stereo(inp + inoffset + inbaseidx,
392  coep + cycctr * firlen, firlen,
393  outp + outidx, outp + outidx+1);
394  outidx += 2;
395 
396  cycctr++;
397  if (!(cycctr %= up))
398  inbaseidx += 2*down;
399  if (!(outidx %= outsize))
400  return(outsize);
401  }
402  }
403 }
404 
405 /*
406  * set up coefficient array
407  */
408 static void make_coe(void)
409 {
410  int i, q;
411 
412  for (i = 0; i < g_firlen; i++) {
413  for (q = 0; q < g_up; q++) {
414  g_coep[q * g_firlen + i] = coefficient(i, q, g_firlen,
415  g_fgk, g_fgg, g_fsi, g_up, g_down, g_ampli);
416  }
417  }
418 }
419 
420 /***********************************************************************/
421 /* Serious modifications by Alan W Black (awb@cstr.ed.ac.uk) */
422 /* to interface with rest of system // deleted various io functions */
423 /* too. */
424 /***********************************************************************/
425 static WORD *inbuff = NULL;
426 static int inpos;
427 static int inmax;
428 static WORD *outbuff = NULL;
429 static int outpos;
430 static int outmax;
431 
432 static int ioerr(void)
433 {
434  delete g_coep;
435  return -1;
436 }
437 
438 static int gcd(int x, int y)
439 {
440  int remainder,a,b;
441 
442  if ((x < 1) || (y < 1))
443  return -1;
444 
445  for (a=x,b=y; b != 0; )
446  {
447  remainder = a % b;
448  a = b;
449  b = remainder;
450  }
451  return a;
452 }
453 
454 static int find_ratios(int in_samp_freq,int out_samp_freq,int *up,int *down)
455 {
456  // Find ratios
457  int d;
458 
459  d = gcd(in_samp_freq,out_samp_freq);
460  if (d == -1) return -1;
461  *down = in_samp_freq / d;
462  *up = out_samp_freq / d;
463 
464  if ((*up > 1024) || (*down > 1024))
465  return -1; // should try harder
466 
467  return 0;
468 }
469 
470 static int intimport(float *buff, int n)
471 {
472  /* Import n more samples from PWave into buff */
473  int i,end;
474 
475  if ((inpos+n) >= inmax)
476  end = inmax - inpos;
477  else
478  end = n;
479  for (i=0;i < end; i++)
480  buff[i] = inbuff[inpos++];
481 
482  return i;
483 }
484 
485 static int intexport(float *buff, int n)
486 {
487  /* Export n samples from buff into end of PWave */
488  int i,end;
489 
490  if ((outpos+n) >= outmax)
491  end = outmax - inpos;
492  else
493  end = n;
494  for (i=0;i < end; i++)
495  outbuff[outpos++] = (short)buff[i];
496 
497  return i;
498 }
499 
500 static int init_globs(WORD *in,int insize, WORD **out, int *outsize,
501  int in_samp_freq, int out_samp_freq)
502 {
503  int new_size;
504  g_monoflag = 1; /* always mono */
505  if (find_ratios(in_samp_freq,out_samp_freq,&g_up,&g_down) == -1)
506  return -1;
507  g_fsi = 1.0; /* ? in_samp_freq ? */
508  if (g_up > g_down)
509  { // upsampling
510  g_fgg = 0.0116;
511  g_fgk = 0.461;
512  g_firlen = (int)(162 * (float)g_up/(float)g_down);
513  }
514  else
515  { // downsampling
516  g_fgg = (float)g_up/(float)g_down * 0.0116;
517  g_fgk = (float)g_up/(float)g_down * 0.461;
518  g_firlen = (int)(162 * (float)g_down/(float)g_up);
519  }
520  if (g_firlen < 1 || g_firlen > MAXLENGTH)
521  return -1;
522  g_ampli = 0.8;
523  g_coep = new float[g_firlen * g_up];
524 
525  inpos = 0;
526  inmax = insize;
527  inbuff = in;
528  new_size = (int)(((float)out_samp_freq/(float)in_samp_freq)*
529  1.1*insize)+2000;
530  *out = new WORD[new_size];
531  outbuff = *out;
532  outmax = new_size;
533  *outsize = 0;
534  outpos = 0;
535 
536  /* For filter_on_buffers */
537  inbaseidx = 0;
538  inoffset = 0;
539  cycctr = 0;
540  outidx = 0;
541 
542  return 0;
543 }
544 
545 
546 /*
547  * External call added by Alan W Black, 4th June 1996
548  * a combination of parse args and main
549  */
550 int rateconv(short *in,int isize, short **out, int *osize,
551  int in_samp_freq, int out_samp_freq)
552 {
553  int insize = 0, outsize = 0, skirtlen;
554 
555  if (init_globs(in,isize,out,osize,in_samp_freq,out_samp_freq) == -1)
556  return -1;
557 
558  make_coe();
559  skirtlen = (g_firlen - 1) * (g_monoflag ? 1 : 2);
560  zerofill(g_sin, skirtlen);
561  do {
562  insize = intimport(g_sin + skirtlen, INBUFFSIZE - skirtlen);
563  if (insize < 0 || insize > INBUFFSIZE - skirtlen)
564  return ioerr();
565  do {
566  outsize = filtering_on_buffers(g_sin, skirtlen + insize,
567  g_sout, OUTBUFFSIZE,
568  g_coep, g_firlen, g_up, g_down,
569  g_monoflag);
570  if (outsize != OUTBUFFSIZE) {
571  transfer_int(g_sin + insize, g_sin, skirtlen);
572  break;
573  }
574  if (intexport(g_sout, outsize) != outsize)
575  return ioerr();
576  } while (-1);
577  } while (insize > 0);
578  zerofill(g_sin + skirtlen, skirtlen);
579  do {
580  outsize = filtering_on_buffers(g_sin, skirtlen + skirtlen,
581  g_sout, OUTBUFFSIZE,
582  g_coep, g_firlen, g_up, g_down,
583  g_monoflag);
584  if (intexport(g_sout, outsize) != outsize)
585  return ioerr();
586  } while (outsize == OUTBUFFSIZE);
587 
588  delete g_coep;
589 
590  *osize = outpos;
591 
592  /* The new signal will be offset by half firlen window so fix it */
593  memmove(*out,*out+g_firlen/4,*osize*2);
594  *osize -= g_firlen/4;
595 
596  return 0;
597 
598 }
599