Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
EST_SCFG.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : October 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* A class for representing Stochastic Context Free Grammars */
38 /* */
39 /*=======================================================================*/
40 #include <iostream>
41 #include "EST_Pathname.h"
42 #include "EST_SCFG.h"
43 
44 EST_SCFG_Rule::EST_SCFG_Rule(double prob,int p, int m)
45 {
46  set_rule(prob,p,m);
47 }
48 
49 EST_SCFG_Rule::EST_SCFG_Rule(double prob,int p, int q, int r)
50 {
51  set_rule(prob,p,q,r);
52 }
53 
54 void EST_SCFG_Rule::set_rule(double prob,int p, int m)
55 {
56  p_prob = prob;
57  p_mother = p;
58  p_daughter1 = m;
59  p_type = est_scfg_unary_rule;
60 }
61 
62 void EST_SCFG_Rule::set_rule(double prob,int p, int q, int r)
63 {
64  p_prob = prob;
65  p_mother = p;
66  p_daughter1 = q;
67  p_daughter2 = r;
68  p_type = est_scfg_binary_rule;
69 }
70 
71 EST_SCFG::EST_SCFG()
72 {
73  p_prob_B=0;
74  p_prob_U=0;
75 }
76 
77 EST_SCFG::EST_SCFG(LISP rs)
78 {
79  p_prob_B=0;
80  p_prob_U=0;
81  set_rules(rs);
82 }
83 
84 EST_SCFG::~EST_SCFG(void)
85 {
86 
87  delete_rule_prob_cache();
88 
89 }
90 
92 {
93  // Cummulate the nonterminals and terminals
94  LISP r;
95 
96  for (r=rs; r != NIL; r=cdr(r))
97  {
98  LISP p = car(cdr(car(r)));
99  if (!strlist_member(nt,get_c_string(p)))
100  nt.append(get_c_string(p));
101  if (siod_llength(car(r)) == 3) // unary rule
102  {
103  LISP d = car(cdr(cdr(car(r))));
104  if (!strlist_member(t,get_c_string(d)))
105  t.append(get_c_string(d));
106  }
107  else // binary rules
108  {
109  LISP d1 = car(cdr(cdr(car(r))));
110  LISP d2 = car(cdr(cdr(cdr(car(r)))));
111  if (!strlist_member(nt,get_c_string(d1)))
112  nt.append(get_c_string(d1));
113  if (!strlist_member(nt,get_c_string(d2)))
114  nt.append(get_c_string(d2));
115  }
116  }
117 
118 }
119 
120 void EST_SCFG::set_rules(LISP lrules)
121 {
122  // Initialise rule base from Lisp form
123  LISP r;
124  EST_StrList nt_list, term_list;
125 
126  rules.clear();
127  delete_rule_prob_cache();
128 
129  find_terms_nonterms(nt_list,term_list,lrules);
130  nonterminals.init(nt_list);
131  terminals.init(term_list);
132 
133  if (!consp(car(cdr(car(lrules)))))
134  p_distinguished_symbol =
135  nonterminal(get_c_string(car(cdr(car(lrules)))));
136  else
137  cerr << "SCFG: no distinguished symbol" << endl;
138 
139  for (r=lrules; r != NIL; r=cdr(r))
140  {
141  if ((siod_llength(car(r)) < 3) ||
142  (siod_llength(car(r)) > 4) ||
143  (!numberp(car(car(r)))))
144  cerr << "SCFG rule is malformed" << endl;
145 // est_error("SCFG rule is malformed\n");
146  else
147  {
148  EST_SCFG_Rule rule;
149  if (siod_llength(car(r)) == 3)
150  {
151  int m = nonterminal(get_c_string(car(cdr(car(r)))));
152  int d = terminal(get_c_string(car(cdr(cdr(car(r))))));
153  rule.set_rule(get_c_float(car(car(r))),m,d);
154  }
155  else
156  {
157  int p = nonterminal(get_c_string(car(cdr(car(r)))));
158  int d1=nonterminal(get_c_string(car(cdr(cdr(car(r))))));
159  int d2 = nonterminal(get_c_string(car(cdr(cdr(cdr(car(r)))))));
160  rule.set_rule(get_c_float(car(car(r))),p,d1,d2);
161  }
162  rules.append(rule);
163  }
164  }
165 
166  rule_prob_cache();
167 }
168 
170 {
171  // Return LISP form of rules
172  EST_Litem *p;
173  LISP r;
174 
175  for (r=NIL,p=rules.head(); p != 0; p=p->next())
176  {
177  if (rules(p).type() == est_scfg_unary_rule)
178  r = cons(cons(flocons(rules(p).prob()),
179  cons(rintern(nonterminal(rules(p).mother())),
180  cons(rintern(terminal(rules(p).daughter1())),NIL))),
181  r);
182  else if (rules(p).type() == est_scfg_binary_rule)
183  r = cons(cons(flocons(rules(p).prob()),
184  cons(rintern(nonterminal(rules(p).mother())),
185  cons(rintern(nonterminal(rules(p).daughter1())),
186  cons(rintern(nonterminal(rules(p).daughter2())),
187  NIL)))),
188  r);
189  }
190  return reverse(r);
191 }
192 
193 EST_read_status EST_SCFG::load(const EST_String &filename)
194 {
195  LISP rs;
196 
197  rs = vload(filename,1);
198 
199  set_rules(rs);
200 
201  return format_ok;
202 }
203 
204 EST_write_status EST_SCFG::save(const EST_String &filename)
205 {
206  EST_Pathname outfile(filename);
207  FILE *fd;
208  LISP r;
209 
210  if (outfile == "-")
211  fd = stdout;
212  else
213  {
214  if ((fd=fopen(outfile,"w")) == NULL)
215  {
216  cerr << "scfg_train: failed to open file \"" << outfile <<
217  "\" for writing" << endl;
218  return misc_write_error;
219  }
220  }
221 
222  for (r=get_rules(); r != NIL; r=cdr(r))
223  pprint_to_fd(fd,car(r));
224 
225  if (fd != stdout)
226  fclose(fd);
227 
228  return write_ok;
229 }
230 
231 
232 void EST_SCFG::rule_prob_cache()
233 {
234  // Build access cache for the probabilities of binary rules
235  // This will have to made much more efficient
236  int i,j;
237 
238  p_prob_B = new double**[num_nonterminals()];
239  p_prob_U = new double*[num_nonterminals()];
240  for (i=0; i < num_nonterminals(); i++)
241  {
242  p_prob_B[i] = new double*[num_nonterminals()];
243  p_prob_U[i] = new double[num_terminals()];
244  memset(p_prob_U[i],0,sizeof(double)*num_terminals());
245  for (j=0; j < num_nonterminals(); j++)
246  {
247  p_prob_B[i][j] = new double[num_nonterminals()];
248  memset(p_prob_B[i][j],0,sizeof(double)*num_nonterminals());
249  }
250  }
251 
253 
254 }
255 
257 {
258  EST_Litem *pp;
259 
260  for (pp=rules.head(); pp != 0; pp = pp->next())
261  {
262  if (rules(pp).type() == est_scfg_binary_rule)
263  {
264  int p = rules(pp).mother();
265  int q = rules(pp).daughter1();
266  int r = rules(pp).daughter2();
267  p_prob_B[p][q][r] = rules(pp).prob();
268  }
269  else if (rules(pp).type() == est_scfg_unary_rule)
270  {
271  int p = rules(pp).mother();
272  int m = rules(pp).daughter1();
273  p_prob_U[p][m] = rules(pp).prob();
274  }
275  }
276 }
277 
278 void EST_SCFG::delete_rule_prob_cache()
279 {
280  int i,j;
281 
282  if (p_prob_B == 0)
283  return;
284 
285  for (i=0; i < num_nonterminals(); i++)
286  {
287  for (j=0; j < num_nonterminals(); j++)
288  delete [] p_prob_B[i][j];
289  delete [] p_prob_B[i];
290  delete [] p_prob_U[i];
291  }
292  delete [] p_prob_B;
293  delete [] p_prob_U;
294 
295  p_prob_B = 0;
296  p_prob_U = 0;
297 }
298 
299 ostream &operator << (ostream &s, const EST_SCFG_Rule &rule)
300 {
301  (void)rule;
302  return s << "<<EST_SCFG_Rule>>";
303 }
304 
305 Declare_TList(EST_SCFG_Rule)
306 #if defined(INSTANTIATE_TEMPLATES)
307 #include "../base_class/EST_TList.cc"
308 #include "../base_class/EST_TSortable.cc"
309 
310 Instantiate_TList(EST_SCFG_Rule)
311 #endif
312