Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
rgcompile.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : December 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* A Regular grammar compiler, its pretty free about the grammar */
38 /* Actually it will take full context free grammars and convert them */
39 /* up to a specified rewrite depth */
40 /* */
41 /* Based loosely on "Finite State Machines from Features Grammars" by */
42 /* Black, International Workshop of Parsing Technologies, CMU 89 */
43 /* */
44 /*=======================================================================*/
45 #include <iostream>
46 #include "EST_cutils.h"
47 #include "EST_WFST.h"
48 
49 static LISP find_rewrites(LISP rules, LISP terms, LISP nonterms);
50 static LISP rg_find_nt_ts(LISP rules,LISP sets);
51 static LISP prod_join(LISP n, LISP p);
52 static int production_index(LISP state,
54  int proposed);
55 
56 void rgcompile(LISP rg, EST_WFST &all_wfst)
57 {
58  // Build a transducer from given regular grammar.
59  LISP nt_ts,nonterms,terms,rewrites;
60  LISP sets=siod_nth(2,rg);
61  LISP rules=siod_nth(3,rg);
62 
63  nt_ts = rg_find_nt_ts(rules,sets);
64  nonterms = car(nt_ts);
65  terms = cdr(nt_ts);
66 
67  rewrites = find_rewrites(rules,terms,nonterms);
68 
69  if (rewrites == NIL)
70  return; // left recursive or no rules
71 
72  all_wfst.build_from_rg(terms,terms,
73  car(car(rules)), // distinguished symbol
74  rewrites,
75  sets,terms,25);
76 
77 }
78 
79 static LISP find_rewrites(LISP rules, LISP terms, LISP nonterms)
80 {
81  // Find the full rewrites of each nonterminal until a terminal
82  // appears as the first item
83  LISP nt,r;
84  LISP rewrites = NIL;
85  (void)terms;
86 
87  // got lazy and haven't done this recursively yet ...
88  for (nt=nonterms; nt != NIL; nt=cdr(nt))
89  {
90  LISP nn = NIL;
91  for (r=rules; r != NIL; r=cdr(r))
92  if (car(car(r)) == car(nt)) // depend on symbols being eq
93  nn = cons(cdr(cdr(car(r))),nn);
94  rewrites = cons(cons(car(nt),nn),rewrites);
95  }
96 
97  return rewrites;
98 }
99 
100 static LISP rg_find_nt_ts(LISP rules,LISP sets)
101 {
102  // Find the alphabets used in the rules
103  LISP terms=NIL,nonterms=NIL,r,s,set,t;
104 
105  for (r=rules; r != NIL; r=cdr(r))
106  if (!siod_member_str(get_c_string(car(car(r))),nonterms))
107  nonterms = cons(car(car(r)),nonterms);
108 
109  for (r=rules; r != NIL; r=cdr(r))
110  for (s=cdr(cdr(car(r))); s != NIL; s=cdr(s))
111  if ((!siod_member_str(get_c_string(car(s)),terms)) &&
112  (!siod_member_str(get_c_string(car(s)),nonterms)) &&
113  (!siod_assoc_str(get_c_string(car(s)),sets)))
114  terms = cons(car(s),terms);
115  else if ((set=siod_assoc_str(get_c_string(car(s)),sets)))
116  {
117  for (t=cdr(set); t != 0; t=cdr(t))
118  if (!siod_member_str(get_c_string(car(t)),terms))
119  terms = cons(car(t),terms);
120  }
121 
122  return cons(nonterms,terms);
123 }
124 
125 
126 void EST_WFST::build_from_rg(LISP inalpha, LISP outalpha,
127  LISP distinguished, LISP rewrites,
128  LISP sets, LISP terms,
129  int max_depth)
130 {
131  // This is sort of similar to determinising in that the "state"
132  // is represented by a list of numbers, i.e. the remainder of
133  // of production
134  LISP current, start_state, remainder, set, new_prod;
135  int ns, current_state;
136  const char *current_sym;
137  LISP agenda = NIL;
138  EST_WFST_MultiStateIndex index(100);
139  (void)max_depth;
140  int c=0;
141 
142  clear();
143  init(inalpha,outalpha);
144  int i_epsilon = in_epsilon();
145  int o_epsilon = out_epsilon();
146 
147  // Create a starting state and add it to this WFST
148  p_start_state = add_state(wfst_nonfinal);
149  start_state = cons(flocons((double)p_start_state),
150  cons(distinguished,NIL));
151 
152  production_index(start_state,index,p_start_state);
153 
154  agenda = cons(start_state,agenda); // initialize agenda
155 
156  while (agenda != NIL)
157  {
158  current = car(agenda);
159  agenda = cdr(agenda);
160  current_state = get_c_int(car(current));
161  current_sym = get_c_string(car(cdr(current)));
162  remainder = cdr(cdr(current));
163  if ((c % 1000)== 0)
164  cout << summary() << " Agenda " << siod_llength(agenda) << endl;
165  c++;
166 
167  if ((set=siod_assoc_str(current_sym,sets)))
168  {
169  ns = production_index(remainder,index,p_num_states);
170  // add transitions for each member of set
171  for (LISP s=cdr(set); s != NIL; s=cdr(s))
172  p_states[current_state]
173  ->add_transition(0.0, // no weights
174  ns,
175  p_in_symbols.name(get_c_string(car(s))),
176  p_out_symbols.name(get_c_string(car(s))));
177  if (remainder == NIL)
178  add_state(wfst_final);
179  else if (ns == p_num_states) // its a new remainder
180  {
181  add_state(wfst_nonfinal);
182  agenda = cons(cons(flocons(ns),remainder),agenda);
183  }
184  }
185  else if (siod_member_str(current_sym,terms))
186  {
187  ns = production_index(remainder,index,p_num_states);
188  // Add transition for this terminal symbol
189  p_states[current_state]
190  ->add_transition(0.0, // no weights
191  ns,
192  p_in_symbols.name(current_sym),
193  p_out_symbols.name(current_sym));
194  if (remainder == NIL)
195  add_state(wfst_final);
196  else if (ns == p_num_states) // its a new remainder
197  {
198  add_state(wfst_nonfinal);
199  agenda = cons(cons(flocons(ns),remainder),agenda);
200  }
201  }
202  else // its a non-terminal so simply rewrite
203  {
204  for (LISP p=cdr(siod_assoc_str(current_sym,rewrites));
205  p != NIL;
206  p=cdr(p))
207  {
208  new_prod = prod_join(car(p),remainder);
209  ns = production_index(new_prod,index,p_num_states);
210  p_states[current_state]
211  ->add_transition(0.0, // no weights
212  ns,i_epsilon,o_epsilon);
213  if (ns == p_num_states) // its a new remainder
214  {
215  if (new_prod == NIL)
216  add_state(wfst_final);
217  else
218  {
219  add_state(wfst_nonfinal);
220  agenda = cons(cons(flocons(ns),new_prod),agenda);
221  }
222  }
223  }
224  }
225  }
226 }
227 
228 static int production_index(LISP state,
230  int proposed)
231 {
232  // Returns proposed if ms is not already in index, otherwise
233  // returns the value that was proposed when it was first new.
234 
235  // I'll have to make this more efficient in future.
236  EST_String istring("");
237  LISP p;
238 
239  for (p=state; p != NIL; p = cdr(p))
240  istring += EST_String(get_c_string(car(p))) + " ";
241 
242  int ns,found;
243 
244  for (p=state; p != NIL; p = cdr(p))
245  istring += EST_String(get_c_string(car(p))) + " ";
246 
247  ns = index.val(istring,found);
248  if (found)
249  return ns;
250  else
251  {
252  index.add_item(istring,proposed);
253  return proposed;
254  }
255 
256 }
257 
258 static LISP prod_join(LISP n, LISP p)
259 {
260  if (n == NIL)
261  return p;
262  else
263  return cons(car(n),prod_join(cdr(n),p));
264 }