Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
EST_multistats.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor */
34 /* Date : July 1995 */
35 /*-----------------------------------------------------------------------*/
36 /* Basic Multivariate Statistical Functions */
37 /* */
38 /*=======================================================================*/
39 #include <cmath>
40 #include "EST_multistats.h"
41 
42 float mean(EST_FVector &v)
43 {
44  int i;
45  float u = 0.0;
46 
47  for (i = 0; i < v.n(); ++i)
48  u += v(i);
49 
50  return u / v.n();
51 }
52 
53 float sum(EST_FVector &v)
54 {
55  int i;
56  float u = 0.0;
57 
58  for (i = 0; i < v.n(); ++i)
59  u += v(i);
60 
61  return u;
62 }
63 
64 EST_FVector mean(EST_FMatrix &m)
65 {
66  EST_FVector u(m.num_columns());
67  int i, j;
68 
69  for (i = 0; i < m.num_columns(); ++i)
70  {
71  u[i] = 0.0;
72  for (j = 0; j < m.num_rows(); ++j)
73  u[i] += m(j, i);
74  u[i] /= m.num_rows();
75  }
76 
77  return u;
78 }
79 
80 EST_FVector sample_variance(EST_FMatrix &m)
81 {
82  EST_FVector v(m.num_columns());
83  EST_FVector u(m.num_columns());
84  int i, j;
85 
86  u = mean(m);
87 
88  for (j = 0; j < m.num_columns(); ++j)
89  {
90  v[j] = 0.0;
91  for (i = 0; i < m.num_rows(); ++i)
92  v[j] += pow(m(i, j) - u(j), float(2.0));
93  v[j] /= m.num_rows() - 1; // sample variance
94  }
95 
96  return v;
97 }
98 
99 EST_FVector sample_stdev(EST_FMatrix &m)
100 {
101  EST_FVector v;
102  v = sample_variance(m);
103  int j;
104 
105  for (j = 0; j < v.n(); ++j)
106  v[j] = sqrt(v(j));
107 
108  return v;
109 }
110 
111 EST_FMatrix sample_covariance(EST_FMatrix &m)
112 {
113  EST_FVector u(m.num_columns());
114  EST_FMatrix c(m.num_columns(), m.num_columns());
115  int i, j, k;
116 
117  u = mean(m);
118 
119  for (j = 0; j < m.num_columns(); ++j)
120  for (k = 0; k < m.num_columns(); ++k)
121  {
122  c(j, k) = 0.0;
123  for (i = 0; i < m.num_rows(); ++i)
124  c(j, k) += (m(i, j) - u(j)) * (m(i, k) - u(k));
125  c(j, k) /= m.num_rows();
126  }
127 
128 
129  return c;
130 }
131 
132 EST_FMatrix sample_correlation(EST_FMatrix &m)
133 {
134  EST_FMatrix r(m.num_columns(), m.num_columns());
135 
136  EST_FVector s = sample_stdev(m);
137  EST_FMatrix c = sample_covariance(m);
138 
139  int j, k;
140 
141  for (j = 0; j < m.num_columns(); ++j)
142  for (k = 0; k < m.num_columns(); ++k)
143  r(j, k) = c(j, k)/(s(j) * s(k));
144 
145  return r;
146 }
147 
148 EST_FMatrix euclidean_distance(EST_FMatrix &m)
149 {
150  // nowt yet
151  return m;
152 }
153 
154 // normalise matrix m, by subtracting vector "sub" from each column and
155 // then dividing each column by vector div;
156 
157 EST_FMatrix normalise(EST_FMatrix &m, EST_FVector &sub, EST_FVector &div)
158 {
159  int i, j;
160  EST_FMatrix z(m.num_rows(), m.num_columns());
161 
162  for (j = 0; j < m.num_columns(); ++j)
163  for (i = 0; i < m.num_rows(); ++i)
164  z(i, j) = (m(i, j) - sub(j)) / div(j);
165 
166  return z;
167 }
168 
169 // calculate penrose distance for matrix of means gu of p variables
170 // (columns) in g populations (columns), and variance gv.
171 
172 EST_FMatrix penrose_distance(EST_FMatrix &gu, EST_FVector &gv)
173 {
174  int i, j, k;
175  int p = gu.num_columns();
176  int n = gu.num_rows();
177  EST_FMatrix P(n, n);
178 
179  cout << "pop mean " << gu;
180 
181  for (i = 0; i < n; ++i)
182  for (j = 0; j < n; ++j)
183  {
184  P(i, j) = 0.0;
185  for (k = 0; k < p; ++k)
186  P(i, j) += pow(gu(i, k) - gu(j, k), float(2.0)) / gv(k);
187  P(i, j) /= p;
188  }
189  return P;
190 }
191 
192 float single_mahal(EST_FMatrix &ui, EST_FMatrix &uj, EST_FMatrix &invv)
193 {
194  float e;
195  EST_FMatrix a, b,c,d;
196 
197  a = ui - uj;
198  transpose(a,b);
199  multiply(b,invv,c);
200  multiply(c,a,d);
201  e = d(0,0);
202  return e;
203 }
204 
205 EST_FMatrix mahalanobis_distance(EST_FMatrix &gu, EST_FMatrix &covar)
206 {
207  int i, j;
208  int n = gu.num_rows();
209  EST_FMatrix P(n, n), invv, ui, uj;
210 
211  inverse(covar,invv); // should be inverse!!
212 
213  for (i = 0; i < n; ++i)
214  for (j = 0; j < n; ++j)
215  {
216  transpose(row(gu, i),ui);
217  transpose(row(gu, j),uj);
218  P(i, j) = single_mahal(ui, uj, invv);
219  }
220 
221  return P;
222 }
223 
224 float simple_penrose(EST_FVector &ui, EST_FVector &uj, EST_FVector &v)
225 {
226  int k;
227  int n = uj.n();
228  float P = 0.0;
229 
230  for (k = 0; k < n; ++k)
231  P += pow(ui(k) - uj(k), float(2.0)) / v(k);
232  P /= n;
233 
234  return P;
235 }
236 
237 EST_FMatrix population_mean(EST_FMatrix *in, int num_pop)
238 {
239  int i, k;
240  EST_FMatrix pmean(num_pop, in[0].num_columns());
241  EST_FVector u(in[0].num_columns());
242 
243  for (i = 0; i < num_pop; ++i)
244  {
245  u = mean(in[i]);
246  for (k =0; k < in[i].num_columns(); ++k)
247  pmean(i, k) = u(k);
248  }
249  return pmean;
250 }
251 
252 EST_FMatrix add_populations(EST_FMatrix *in, int num_pop)
253 {
254  int i, j, k, l, n;
255 
256  l = 0;
257  for (i = 0; i < num_pop; ++i)
258  l += in[i].num_rows();
259  n = in[0].num_columns();
260 
261  EST_FMatrix msum(l, n);
262 
263  for (k = l = 0; k < num_pop; ++k)
264  for (j =0; j < n; ++j, ++l)
265  for (i = 0; i < in[i].num_rows(); ++i)
266  msum(l, j) = in[k](i, j);
267 
268  return msum;
269 }
270