Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
scfg_parse_main.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : October 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* Parse a list of sentences with a given stochastic context free */
37 /* grammar */
38 /* */
39 /*=======================================================================*/
40 #include <cstdlib>
41 #include <cstdio>
42 #include <iostream>
43 #include <fstream>
44 #include <cstring>
45 #include "EST.h"
46 #include "EST_SCFG.h"
47 #include "EST_SCFG_Chart.h"
48 #include "siod.h"
49 
50 static EST_String outfile = "-";
51 
52 static int scfg_parse_main(int argc, char **argv);
53 
54 
55 /** @name <command>scfg_parse</command> <emphasis>Parse text using a pre-trained stochastic context free grammar</emphasis>
56  @id scfg-parse-manual
57  * @toc
58  */
59 
60 //@{
61 
62 
63 /**@name Synopsis
64  */
65 //@{
66 
67 //@synopsis
68 
69 /**
70 
71 This parses given text with a given stochastic context free grammar.
72 Note this program is not designed as an arbitrary parser for
73 unrestricted English. It simply parses the input non-terminals
74 with the given grammar. If you want to English (or other language)
75 parses consider using the festival script <command>scfg_parse</command>
76 which does proper tokenization and part of speech tagging, before
77 passing it to a SCFG.
78 
79  */
80 
81 //@}
82 
83 /**@name OPTIONS
84  */
85 //@{
86 
87 //@options
88 
89 //@}
90 
91 int main(int argc, char **argv)
92 {
93 
94  scfg_parse_main(argc,argv);
95 
96  exit(0);
97  return 0;
98 }
99 
100 static int scfg_parse_main(int argc, char **argv)
101 {
102  // Top level function generates a probabilistic grammar
103  EST_Option al;
104  EST_StrList files;
105  EST_SCFG_Chart chart;
106  LISP rules,s,parse;
107  FILE *corpus,*output;
108  int i;
109 
110  parse_command_line
111  (argc, argv,
112  EST_String("[options]\n")+
113  "Summary: Parse a corpus with a stochastic context free grammar\n"+
114  "-grammar <ifile> Grammar file, one rule per line.\n"+
115  "-corpus <ifile> Corpus file, one bracketed sentence per line.\n"+
116  "-brackets Output bracketing only.\n"+
117  "-o <ofile> Output file for parsed sentences.\n",
118  files, al);
119 
120  if (al.present("-o"))
121  outfile = al.val("-o");
122  else
123  outfile = "-";
124 
125  siod_init();
126 
127  if (al.present("-grammar"))
128  {
129  rules = vload(al.val("-grammar"),1);
130  gc_protect(&rules);
131  }
132  else
133  {
134  cerr << "scfg_parse: no grammar specified" << endl;
135  exit(1);
136  }
137 
138  if (al.present("-corpus"))
139  {
140  if ((corpus = fopen(al.val("-corpus"),"r")) == NULL)
141  {
142  cerr << "scfg_parse: can't open corpus file \"" <<
143  al.val("-corpus") << "\" for reading " << endl;
144  exit(1);
145  }
146  }
147  else
148  {
149  cerr << "scfg_parse: no corpus specified" << endl;
150  exit(1);
151  }
152 
153  if (al.present("-o"))
154  {
155  if ((output=fopen(al.val("-o"),"w")) == NULL)
156  {
157  cerr << "scfg_parse: can't open output file \"" <<
158  al.val("-o") << "\" for writing " << endl;
159  exit(1);
160  }
161  }
162  else
163  output = stdout;
164 
165  gc_protect(&s);
166  gc_protect(&parse);
167  for (i=0; ((s=lreadf(corpus)) != get_eof_val()); i++)
168  {
169  parse = scfg_parse(s,rules);
170  if (al.present("-brackets"))
171  {
172  LISP bparse = scfg_bracketing_only(parse);
173  if (bparse == NIL)
174  bparse = s;
175  pprint_to_fd(output,bparse);
176  }
177  else
178  pprint_to_fd(output,parse);
179  if (i%100 == 99)
180  user_gc(NIL);
181  }
182 
183  if (output != stdout)
184  fclose(output);
185  gc_unprotect(&s);
186  gc_unprotect(&parse);
187  gc_unprotect(&rules);
188 
189  return 0;
190 }
191