Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
spectrogram.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1994,1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor */
34 /* Date : December 96 */
35 /*-----------------------------------------------------------------------*/
36 /* Spectrogram Generation */
37 /* */
38 /*=======================================================================*/
39 #include <cmath>
40 #include <climits>
41 #include <cfloat> /* needed for FLT_MAX */
42 #include "EST_error.h"
43 #include "EST_Track.h"
44 #include "EST_Wave.h"
45 #include "sigpr/EST_Window.h"
46 #include "EST_Option.h"
47 #include "sigpr/EST_fft.h"
48 #include "sigpr/EST_spectrogram.h"
49 #include "sigpr/EST_misc_sigpr.h"
50 
51 
52 void make_spectrogram(EST_Wave &sig, EST_Track &sp, EST_Features &op)
53 {
54  EST_Wave psig;
55 
56  EST_pre_emphasis(sig, psig, op.F("preemph"));
57 
58  // calculate raw spectrogram
59  raw_spectrogram(sp, psig, op.F("frame_length"), op.F("frame_shift"),
60  op.I("frame_order"), op.present("slow_fft"));
61 
62  if (op.present("raw"))
63  {
64  cout << "no scaling\n";
65  return;
66  }
67  // coerce the values so as to emphasis important features
68 
69  if (op.present("sp_range") || op.present("sp_wcut") || op.present("sp_bcut"))
70  {
71  if (!op.present("sp_range"))
72  op.set("sp_range", 1.0);
73 
74  if (!op.present("sp_wcut"))
75  op.set("sp_wcut", 1.0);
76 
77  if (!op.present("sp_bcut"))
78  op.set("sp_bcut", 0.0);
79  scale_spectrogram(sp, op.F("sp_range"),op.F("sp_wcut"),op.F("sp_bcut"));
80  }
81 }
82 
83 void scale_spectrogram(EST_Track &sp, float range, float wcut, float bcut)
84 {
85  float max, min, scale, v;
86  int i, j;
87 
88  max = -FLT_MIN;
89  min = FLT_MAX;
90 
91  // find min and max values
92  for (i = 0; i < sp.num_frames(); ++i)
93  for (j = 0; j < sp.num_channels(); ++j)
94  {
95  float vv = sp.a_no_check(i, j);
96 
97  if (vv > max)
98  max = vv;
99  if (vv < min)
100  min = vv;
101  }
102  scale = (max - min);
103 
104  // for every value:
105  // 1. Effectively scale in range 0 to 1
106  // 2. Impose white and black cut offs
107  // 3. Rescale to 0 and 1
108  // 4. scale to fit in "range"
109  // this can obviously be done more efficiently
110 
111  float mag = (float)range / (float)(bcut - wcut);
112  for (i = 0; i < sp.num_frames(); ++i)
113  for (j = 0; j < sp.num_channels(); ++j)
114  {
115  v = (((sp.a_no_check(i, j) - min) / scale) - wcut) * mag;
116  if (v > range) v = range;
117  if (v < 0.0) v = 0.0;
118  sp.a_no_check(i, j) = v;
119  }
120 }
121 
122 void raw_spectrogram(EST_Track &sp, EST_Wave &sig,
123  float length,
124  float shift,
125  int order,
126  bool slow)
127 {
128  int frame_length = (int) (length * (float) sig.sample_rate() +0.5);
129  int frame_shift = (int) (shift * (float) sig.sample_rate() +0.5);
130 
131  EST_WindowFunc *make_window = EST_Window::creator("hamming");
132 
133  // sanity check, we can't analyse more signal than order allows.
134  if (frame_length > order)
135  {
136  EST_warning("frame_length reduced to %f (%d samples) to fit order\n",
137  (float)order/(float) sig.sample_rate(), order);
138  frame_length=order;
139  }
140 
141  // enough frames to cover the entire signal
142  int num_frames= (int)ceil(sig.num_samples()/(float)frame_shift);
143 
144  // spectrogram gets order/2 powers, the moduli of order/2
145  // complex numbers
146  sp.resize(num_frames, order/2, FALSE);
147 
148  EST_FVector real(order);
149  EST_FVector imag(order);
150 
151  // create the window shape
152  EST_TBuffer<float> window_vals(frame_length);
153  make_window(frame_length, window_vals,-1);
154 
155  for (int k = 0 ; k < num_frames ; k++)
156  {
157  int pos = frame_shift * k;
158  int window_start = pos - frame_length/2;
159 
160  real.empty();
161 
162  // imag not used in old FFT code
163  if (slow)
164  imag.empty();
165 
167  window_vals,
168  window_start,
169  frame_length,
170  real, FALSE);
171 
172  int state = slow?power_spectrum_slow(real, imag):power_spectrum(real, imag);
173  if (state != 0)
174  {
175  fprintf(stderr, "FFT Failed for frame %d\n", k);
176  for (int i = 0; i < order /2; ++i)
177  sp.a_no_check(k, i) = 0;
178  }
179  else
180  sp.copy_frame_in(k, real);
181  }
182  sp.fill_time(shift);
183 }
184