Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
wfst_regex.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : November 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* WFST functions for building from REGEXs */
38 /* */
39 /*=======================================================================*/
40 #include <iostream>
41 #include "EST_cutils.h"
42 #include "EST_WFST.h"
43 
44 void EST_WFST::build_or_transition(int start, int end, LISP disjunctions)
45 {
46  // Choice of either disjunct
47  LISP l;
48  int intermed;
49 
50  if (disjunctions == NIL)
51  cerr << "WFST construct: disjunct is nil\n";
52 
53  for (l=disjunctions; l != NIL; l=cdr(l))
54  {
55  // Can't go directly to end as other transitions could be added there
56  intermed = add_state(wfst_nonfinal);
57  build_wfst(start,intermed,car(l));
58  build_wfst(intermed,end,epsilon_label());
59  }
60 }
61 
62 void EST_WFST::build_and_transition(int start, int end, LISP conjunctions)
63 {
64  // require each conjunct in turn
65  int intermed,lstart;
66  LISP l;
67 
68  if (conjunctions == NIL)
69  cerr << "WFST build: conjunct is nil\n";
70 
71  lstart = start;
72  for (l=conjunctions; cdr(l) != NIL; l=cdr(l))
73  {
74  intermed = add_state(wfst_nonfinal);
75  build_wfst(lstart,intermed,car(l));
76  lstart = intermed;
77  }
78  build_wfst(lstart,end,car(l));
79 
80 }
81 
82 int EST_WFST::terminal(LISP l)
83 {
84  // true, l is a terminal in a regex
85 
86  if (atomp(l))
87  return TRUE;
88  else
89  return FALSE;
90 }
91 
92 int EST_WFST::operator_or(LISP l)
93 {
94  if (l && !consp(l) && (streq("or",get_c_string(l))))
95  return TRUE;
96  else
97  return FALSE;
98 }
99 
100 int EST_WFST::operator_plus(LISP l)
101 {
102  if (l && !consp(l) && (streq("+",get_c_string(l))))
103  return TRUE;
104  else
105  return FALSE;
106 }
107 
108 int EST_WFST::operator_not(LISP l)
109 {
110  if (l && !consp(l) && (streq("not",get_c_string(l))))
111  return TRUE;
112  else
113  return FALSE;
114 }
115 
116 int EST_WFST::operator_star(LISP l)
117 {
118  if (l && !consp(l) && (streq("*",get_c_string(l))))
119  return TRUE;
120  else
121  return FALSE;
122 }
123 
124 int EST_WFST::operator_optional(LISP l)
125 {
126  if (l && !consp(l) && (streq("?",get_c_string(l))))
127  return TRUE;
128  else
129  return FALSE;
130 }
131 
132 int EST_WFST::operator_and(LISP l)
133 {
134  if (l && !consp(l) && (streq("and",get_c_string(l))))
135  return TRUE;
136  else
137  return FALSE;
138 }
139 
140 void EST_WFST::build_wfst(int start, int end,LISP regex)
141 {
142  if (terminal(regex))
143  {
144  // unpack the label
145  int in,out;
146  EST_String s_name(get_c_string(regex));
147  if (s_name.contains("/"))
148  {
149  in = p_in_symbols.name(s_name.before("/"));
150  out = p_out_symbols.name(s_name.after("/"));
151  }
152  else
153  {
154  in = p_in_symbols.name(get_c_string(regex));
155  out = p_out_symbols.name(get_c_string(regex));
156  }
157  if ((in == -1) || (out == -1))
158  cerr << "WFST_build: symbol " << get_c_string(regex) <<
159  " not in alphabet\n";
160  p_states[start]->add_transition(0,end,in,out);
161  }
162  else if (operator_or(car(regex)))
163  build_or_transition(start,end,cdr(regex));
164  else if (operator_plus(car(regex)))
165  {
166  build_wfst(start,end,cdr(regex));
167  build_wfst(end,end,cdr(regex));
168  }
169  else if (operator_star(car(regex)))
170  {
171  build_wfst(start,start,cdr(regex));
172  build_wfst(start,end,epsilon_label());
173  }
174  else if (operator_not(car(regex)))
175  {
176  int errstate = add_state(wfst_error);
177  build_and_transition(start,errstate,cdr(regex));
178  }
179  else if (operator_optional(car(regex)))
180  {
181  build_wfst(start,end,cdr(regex));
182  build_wfst(start,end,epsilon_label());
183  }
184  else if (operator_and(car(regex)))
185  build_and_transition(start,end,cdr(regex));
186  else
187  build_and_transition(start,end,regex); // default is and
188 }
189 
190 void EST_WFST::build_from_regex(LISP inalpha, LISP outalpha, LISP regex)
191 {
192 
193  clear();
194 
195  cout << "building from regex: " << endl;
196  pprint(regex);
197 
198  init(inalpha,outalpha); // alphabets
199  if (regex == NIL)
200  p_start_state = add_state(wfst_final); // empty WFST
201  else
202  {
203  p_start_state = add_state(wfst_nonfinal);
204  int end = add_state(wfst_final);
205  build_wfst(p_start_state,end,regex);
206  }
207 }
208