Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
EST_cluster.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor */
34 /* Date : July 1995 */
35 /*-----------------------------------------------------------------------*/
36 /* Event RFC labelling */
37 /* */
38 /*=======================================================================*/
39 
40 #include <cstdlib>
41 #include "EST_system.h"
42 #include "EST_FMatrix.h"
43 #include "EST_cluster.h"
44 #include <fstream>
45 #include "EST_string_aux.h"
46 #include <cfloat>
47 
48 int fn_cluster(EST_FMatrix &m, EST_CBK &cbk, float d);
49 int nn_cluster(EST_FMatrix &m, EST_CBK &cbk, float d);
50 int nn_cluster2(EST_FMatrix &m, EST_CBK &cbk, float d);
51 float lval(EST_FMatrix &a, float floor, int &row, int &col);
52 float nn_cluster3(EST_FMatrix &m, EST_CBK &cbk, EST_String method);
53 
54 void init_cluster(EST_CBK &cbk, int n)
55 {
56  int i;
57  EST_TList<int> tmp;
58 
59  for (i = 0; i < n; ++i)
60  {
61  tmp.clear();
62  tmp.append(i);
63  cbk.append(tmp);
64  }
65 }
66 
67 int cluster(EST_FMatrix &m, EST_CBK &cbk, EST_TList<EST_String> &ans, EST_String method,
68  EST_TList<EST_String> &names)
69 {
70  float dist;
71  while (cbk.length() > 1)
72  {
73  dist = nn_cluster3(m, cbk, method);
74  ans.append(print_codebook(cbk, dist, names));
75  }
76  return 0;
77 }
78 
79 // return true if list l contains integer n
80 int contains(EST_TList<int> &l, int n)
81 {
82  EST_Litem *p;
83 
84  for (p = l.head(); p != 0; p = p->next())
85  if (l(p) == n)
86  return 1;
87 
88  return 0;
89 }
90 
91 void remove_distances(EST_FMatrix &d , EST_TList<int> &group)
92 {
93  EST_Litem *pi, *pj;
94  int i, j;
95 
96  for (i = 0, pi = group.head(); pi != 0; pi = pi->next(), ++i)
97  for (j = 0, pj = group.head(); pj != 0; pj = pj->next(), ++j)
98  d(group(pi), group(pj)) = 0.0;
99 }
100 
101 /*EST_FMatrix remove_line(Fmatrix a, int r)
102 {
103  int n;
104  n = a.num_rows() - 1;
105 
106  Fmatrix b(n, n);
107 
108  int i, j;
109 
110  for (i = i2 = 0; i < n; ++i. ++i2)
111  for (j = j2 = 0; j < n; ++j, ++j2)
112  if (i == r)
113  ;
114 
115 */
116 
117 void collapse(EST_FMatrix &d, EST_CBK &cbk, int row, int col)
118 {
119  EST_Litem *pi, *pj;
120 
121  for (pi = cbk.head(); pi != 0; pi = pi->next())
122  if (contains(cbk(pi), row))
123  break;
124 
125  for (pj = cbk.head(); pj != 0; pj = pj->next())
126  if (contains(cbk(pj), col))
127  break;
128 
129  cbk(pi) += cbk(pj);
130  remove_distances(d, cbk(pi));
131  cbk.remove(pj);
132 }
133 
134 float min(float a, float b)
135 {
136  return (a < b) ? a: b;
137 }
138 
139 float max(float a, float b)
140 {
141  return (a > b) ? a: b;
142 }
143 
144 // combine codebook groups "row" and "col" into one group and calculate a
145 // new distance matrix
146 void collapse3(EST_FMatrix &d, EST_CBK &cbk, int row, int col, EST_String method)
147 {
148  int i;
149  EST_Litem *pi;
150  EST_TList<int> v;
151  float fm;
152 
153  cout << "Removing row/column " << col << endl;
154  cout << "row " <<cbk.nth(row) << endl;
155  cout << "col " <<cbk.nth(col) << endl;
156  cbk.nth(row) += cbk.nth(col);
157  cout << "row " <<cbk.nth(row) << endl;
158 
159  for (i = 0; i < d.num_rows(); ++i)
160  {
161  if ((i != row) && (i != col))
162  v.append(i);
163  }
164 
165  cout << "row " << row << " col " << col << " left out " << v;
166 
167  for (pi = v.head(); pi != 0; pi = pi->next())
168  {
169  if (method == "nearest")
170  fm = min(d(row,v(pi)),d(col,v(pi)));
171  else if (method == "furthest")
172  fm = max(d(row,v(pi)),d(col,v(pi)));
173  else
174  fm = min(d(row,v(pi)),d(col,v(pi)));
175 
176  cout << "writing values to " << v(pi) << ", " << row << " min "
177  << fm << endl;
178  d(v(pi), row) = fm;
179  d(row, v(pi)) = fm;
180  }
181 
182  d = sub(d, col, col);
183  cbk.remove_nth(col);
184 }
185 
186 int nn_cluster2(EST_FMatrix &m, EST_CBK &cbk, float d)
187 {
188  static float smallest = 0.0;
189  int row=0, col=0;
190  (void)d;
191 
192 // Change so that all values aprt from lowest in codebook get set to
193 // Nan (or whatever)
194 
195  smallest = lval(m, smallest, row, col);
196  cout << "smallest = " << smallest << endl;
197  cout << "row = " << row << " col " << col << endl;
198  collapse(m, cbk, row, col);
199 
200  for (EST_Litem *p = cbk.head(); p != 0; p = p->next())
201  cout << cbk(p);
202  cout << "New matrix\n" << m;
203 
204  // cout << cbk;
205  return 1;
206 }
207 
208 float nn_cluster3(EST_FMatrix &m, EST_CBK &cbk, EST_String method)
209 {
210  static float smallest = 0.0;
211  int row=0, col=0;
212 
213 // Change so that all values aprt from lowest in codebook get set to
214 // Nan (or whatever)
215 
216  cout << "analysing matrix\n" << m;
217  smallest = lval(m, smallest, row, col);
218  cout << "smallest = " << smallest << endl;
219  cout << "row = " << row << " col " << col << endl;
220  collapse3(m, cbk, row, col, method);
221 
222  for (EST_Litem *p = cbk.head(); p != 0; p = p->next())
223  cout << cbk(p);
224  cout << "New matrix\n" << m << endl << endl;
225 
226  // cout << cbk;
227  return smallest;
228 }
229 
230 int nn_cluster(EST_FMatrix &m, EST_CBK &cbk, float d)
231 {
232  int i;
233  EST_Litem *pi, *pj;
234  float smallest;
235  int c = 0;
236 
237  i = 0;
238  for (pi = cbk.head(); pi != 0; pi = pi->next(), ++i)
239  {
240  for (pj = pi->next(); pj != 0; pj = pj->next())
241  {
242  smallest = lowestval(m, cbk(pj), cbk(pi));
243  if (smallest < d)
244  {
245  cbk(pi) += cbk(pj);
246  cbk(pj).clear();
247  }
248  }
249  }
250 
251  for (pi = cbk.head(); pi != 0; pi = pi->next())
252  {
253  if (cbk(pi).empty())
254  {
255  cout << "Empty entry\n";
256  pi = cbk.remove(pi);
257  c = 1;
258  }
259  else
260  cout << cbk(pi);
261  }
262  return c;
263 }
264 
265 int fn_cluster(EST_FMatrix &m, EST_CBK &cbk, float d)
266 {
267  int i;
268  EST_Litem *pi, *pj;
269  float smallest;
270  int c = 0;
271 
272  i = 0;
273  for (pi = cbk.head(); pi != 0; pi = pi->next(), ++i)
274  {
275  for (pj = pi->next(); pj != 0; pj = pj->next())
276  {
277  smallest = highestval(m, cbk(pj), cbk(pi));
278  if (smallest < d)
279  {
280  cbk(pi) += cbk(pj);
281  cbk(pj).clear();
282  }
283  }
284  }
285 
286  for (pi = cbk.head(); pi != 0; pi = pi->next())
287  {
288  if (cbk(pi).empty())
289  {
290  cout << "Empty entry\n";
291  pi = cbk.remove(pi);
292  c = 1;
293  }
294  else
295  cout << cbk(pi);
296  }
297  return c;
298 }
299 
300 
301 static int sorttest(const void *a, const void *b)
302 { // for use with qsort C library function.
303  float *c = (float *)a;
304  float *d = (float *)b;
305  float res = (*c - *d);
306  if (res == 0.0)
307  return 0;
308  return (res < 0.0) ? -1 : 1;
309 }
310 
311 EST_FVector sort_matrix(EST_FMatrix &m)
312 {
313  int i, j, k;
314  float *v;
315  int n_vals;
316 
317  // determine size of triangular part of matrix, excluding diagonal
318  int size = m.num_rows() - 1;
319 
320  n_vals = 0;
321  for (i = 0; i < size; ++i)
322  n_vals+=(i + 1);
323 
324  cout<<"number of values in EST_FMatrix:" << n_vals << " size " << size << endl;
325 
326  v = new float[n_vals];
327 
328  for (i = k = 0; i < m.num_rows(); ++i)
329  for (j = i + 1; j < m.num_columns(); ++j, ++k)
330  {
331  cout << i << " " << j << " " << k << " " << (i * size) + k << endl;
332  v[k] = m(j, i);
333  }
334 
335  for (i = 0; i < n_vals; ++i)
336  cout << "v[" << i << "] = " << v[i] << endl;
337 
338  qsort(v, n_vals, sizeof(float), sorttest);
339 
340  EST_FVector vsort(n_vals);
341  for (i = 0; i < n_vals; ++i)
342  vsort[i] = v[i];
343 
344  return vsort;
345 }
346 
347 EST_String print_codebook(EST_CBK &cbk, float d, EST_TList<EST_String> &names)
348 {
349  EST_Litem *pi;
350  EST_Litem *pj;
351  EST_String s;
352 
353  s = ftoString(d) + " ";
354  for (pi = cbk.head(); pi != 0; pi = pi->next())
355  {
356  s += "(";
357  for (pj = cbk(pi).head(); pj != 0; pj = pj->next())
358  {
359  if (names.empty())
360  s += itoString(cbk.item(pi).item(pj));
361  else
362  s += names.nth(cbk.item(pi).item(pj));
363  if (pj->next() != 0)
364  s += " ";
365  }
366  s += ") ";
367  }
368  return s;
369 }
370 
371 void cluster3(EST_FMatrix &m, float d)
372 {
373  int n = m.num_rows();
374  EST_TList<int> oldcbk[12];
375 
376  int i, j;
377  float smallest;
378 
379  for (i = 0; i < n; ++i)
380  oldcbk[i].append(i);
381 
382  for (i = 0; i < n; ++i)
383  cout << "n: " << i << " " << oldcbk[i] << endl;
384 
385 
386  for (i = 0; i < n; ++i)
387  for (j = i + 1; j < n; ++j)
388  {
389  smallest = lowestval(m, oldcbk[j], oldcbk[i]);
390  cout << "smallest = " << smallest << " d= " << d << endl << endl;
391  if (smallest < d)
392  {
393  cout << "merging " << i << " " << j << endl << endl;
394  merge(oldcbk, i, j);
395  n--;
396  }
397  }
398 
399  for (i = 0; i < n; ++i)
400  cout << "n: " << i << " " << oldcbk[i] << endl;
401 }
402 /*
403  int cluster2(EST_FMatrix &dist, float d)
404  {
405  int n = dist.num_frames();
406  EST_TList<int> oldcbk[12];
407  EST_TList<int> newcbk[12];
408  float sortval = {2.0, 3.0, 4.0, 5.0, 6.0};
409  int i, j, n, m;
410  EST_Litem *p;
411 
412  for (i = 0; i < n; ++i)
413  oldcbk[i].append(i);
414 
415  i = 0;
416  while (n > 2)
417  {
418  s = findval(dist, m, n, sortval[i++]);
419  merge9u
420  }
421 
422  }
423 
424  float findval(EST_FMatrix &dist, int &n, int &m, float val)
425  {
426  int i, j;
427 
428  for (i = 0; i < m.num_frames(); ++i)
429  for (j = 0; j < m.order(); ++j)
430  if ((m.x[i][j] < (val + 0.001)) && (m.x[i][j] > (val - 0.001)))
431  return;
432 
433  cerr << "Couldn't find value " << val << endl;
434  }
435  */
436 float lowestval(EST_FMatrix &m, EST_TList<int> &a, EST_TList<int> &b)
437 {
438  EST_Litem *pa, *pb;
439  float lowest = 100000.0;
440 
441  cout << "list a:" << a << "list b:" << b;
442 
443  for (pa = a.head(); pa != 0; pa = pa->next())
444  for (pb = b.head(); pb != 0; pb = pb->next())
445  {
446  // cout << "m:" << a(pa) << " " << b(pb) << " " << m.x[a(pa)][b(pb)] << endl;
447  if (m(a(pa), b(pb)) < lowest)
448  lowest = m(a(pa), b(pb));
449  }
450  // cout << "lowest " << lowest << endl;
451  return lowest;
452 }
453 
454 // find the lowest value in matrix a above floor, and return it. Also
455 // set row and column to be the indices of it.
456 float lval(EST_FMatrix &a, float floor, int &row, int &col)
457 {
458  int i, j;
459  float lowest = FLT_MAX;
460 
461  for (i = 0; i < a.num_rows(); ++i)
462  for (j = 0; j < a.num_rows(); ++j)
463  if ((a(i, j) < lowest) && (a(i, j) > floor))
464  {
465  lowest = a(i, j);
466  row = i;
467  col = j;
468  }
469  return lowest;
470 }
471 
472 float highestval(EST_FMatrix &m, EST_TList<int> &a, EST_TList<int> &b)
473 {
474  EST_Litem *pa, *pb;
475  float h = 0.0;
476 
477  cout << "list a:" << a << "list b:" << b;
478 
479  for (pa = a.head(); pa != 0; pa = pa->next())
480  for (pb = b.head(); pb != 0; pb = pb->next())
481  {
482  if (m(a(pa), b(pb)) > h)
483  h = m(a(pa), b(pb));
484  }
485  // cout << "lowest " << lowest << endl;
486  return h;
487 }
488 /*
489  float nearest(EST_FMatrix &m, EST_TList<int> &cbk)
490  {
491  EST_Litem *p;
492  float lowest = 100000.0;
493 
494  for (p = cbk.head(); p != 0; p = p->next())
495  {
496  cout << "cbk(p) " << cbk(p) << endl;
497  if (cbk(p) < lowest)
498  lowest = cbk(p);
499  }
500 
501  cout << "lowest = " << lowest << endl;
502  return lowest;
503  }
504  */
505 void merge(EST_TList<int> cbk[], int i, int j)
506 {
507  EST_Litem *p;
508 
509  for (p = cbk[j].head(); p != 0; p = p->next())
510  cbk[i].append(cbk[j].item(p));
511 
512  cbk[j].clear();
513 }
514 
515 int load_names(EST_String file, EST_TList<EST_String> &names)
516 {
517  char inbuf[1000];
518  EST_String tmpstr;
519 
520  ifstream inf(file);
521  if (!inf) cerr << "Can't open names file " << file << endl;
522 
523  while(inf.getline(inbuf, 1000))
524  {
525  tmpstr = inbuf;
526  names.append(tmpstr);
527  }
528  return 0;
529 }
530 
531 /*int merge(EST_TList<int> &a, EST_TList<int> &b)
532  {
533  EST_TList<int> newgroup;
534  EST_Litem *p;
535 
536  for (p = cbk[j].head(); p != 0; p = p->next())
537  cbk[i].append(cbk[j].item(p));
538 
539  cbk[j].clear();
540  }
541  */
542