Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
XML_Parser.cc
1  /************************************************************************/
2  /* */
3  /* Centre for Speech Technology Research */
4  /* University of Edinburgh, UK */
5  /* Copyright (c) 1996,1997 */
6  /* All Rights Reserved. */
7  /* */
8  /* Permission is hereby granted, free of charge, to use and distribute */
9  /* this software and its documentation without restriction, including */
10  /* without limitation the rights to use, copy, modify, merge, publish, */
11  /* distribute, sublicense, and/or sell copies of this work, and to */
12  /* permit persons to whom this work is furnished to do so, subject to */
13  /* the following conditions: */
14  /* 1. The code must retain the above copyright notice, this list of */
15  /* conditions and the following disclaimer. */
16  /* 2. Any modifications must be clearly marked as such. */
17  /* 3. Original authors' names are not deleted. */
18  /* 4. The authors' names are not used to endorse or promote products */
19  /* derived from this software without specific prior written */
20  /* permission. */
21  /* */
22  /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23  /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24  /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25  /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26  /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27  /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28  /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29  /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30  /* THIS SOFTWARE. */
31  /* */
32  /*************************************************************************/
33  /* */
34  /* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
35  /* -------------------------------------------------------------------- */
36  /* Recursive descent parsing skeleton. */
37  /* */
38  /*************************************************************************/
39 
40 #include "EST_error.h"
41 #include "XML_Parser.h"
42 #include "rxp.h"
43 
45 {
46 }
47 
49 {
50  known_ids.add_item(id_pattern, directory);
51 }
52 
54 {
55  EST_Litem *p;
56 
57  for(p=known_ids.head(); p != 0; p= p->next())
58  {
59  EST_String re(known_ids.key(p).tostring());
60  EST_String &pattern = known_ids.val(p);
61 
62  list.append(re);
63  list.append(pattern);
64  }
65 }
66 
67 XML_Parser *XML_Parser_Class::make_parser(InputSource source, Entity ent, void *data)
68 {
69  return new XML_Parser(*this, source, ent, data);
70 }
71 
72 XML_Parser *XML_Parser_Class::make_parser(InputSource source, void *data)
73 {
74  return new XML_Parser(*this, source, NULL, data);
75 }
76 
77 
79  const EST_String desc,
80  void *data)
81 {
82  Entity ent = NewExternalEntity(0,0,strdup8(desc),0,0);
83 
84  FILE16 *input16=MakeFILE16FromFILE(input, "r");
85 
86  if (input16==NULL)
87  EST_sys_error("Can't open 16 bit '%s'", (const char *)desc);
88 
89  SetCloseUnderlying(input16, 0);
90 
91  return make_parser(NewInputSource(ent, input16), ent, data);
92 }
93 
94 
96  void *data)
97 {
98  return make_parser(input, "<ANONYMOUS>", data);
99 }
100 
101 
103  void *data)
104 {
105  if ( filename == "-" )
106  return make_parser(stdin, data);
107 
108  FILE *input = fopen(filename, "r");
109 
110  if (input==NULL)
111  EST_sys_error("Can't open '%s'", (const char *)filename);
112 
113  Entity ent = NewExternalEntity(0,0,strdup8(filename),0,0);
114 
115  FILE16 *input16=MakeFILE16FromFILE(input, "r");
116 
117  if (input16==NULL)
118  EST_sys_error("Can't open 16 bit '%s'", (const char *)filename);
119 
120  SetCloseUnderlying(input16, 1);
121 
122  return make_parser(NewInputSource(ent, input16), data);
123 }
124 
125 InputSource XML_Parser_Class::try_and_open(Entity ent)
126 
127 {
128  EST_String id = ent->publicid?ent->publicid:ent->systemid;
129  EST_Litem *p;
130 
131  int starts[EST_Regex_max_subexpressions];
132  int ends[EST_Regex_max_subexpressions];
133  for (p = known_ids.head(); p != 0; p = p->next())
134  {
135  EST_Regex &re = known_ids.key(p);
136  EST_String pattern(known_ids.val(p));
137 
138  if (id.matches(re, 0, starts, ends))
139  {
140  EST_String res(pattern);
141  res.subst(id, starts, ends);
142 
143  FILE *f;
144  FILE16 *f16;
145  if((f = fopen(res, "r")))
146  {
147  if(!(f16 = MakeFILE16FromFILE(f, "r")))
148  return 0;
149  SetCloseUnderlying(f16, 1);
150 
151  return NewInputSource(ent, f16);
152  }
153  }
154  }
155 
156  return EntityOpen(ent);
157 }
158 
159 
160 InputSource XML_Parser_Class::open_entity(Entity ent, void *arg)
161 {
162  XML_Parser *parser = (XML_Parser *)arg;
163 
164  return parser->open(ent);
165 }
166 
167 // Default do-nothing callbacks.
168 
170  XML_Parser &p,
171  void *data)
172 { (void)c; (void)p; (void)data; }
173 
175  XML_Parser &p,
176  void *data)
177 { (void)c; (void)p; (void)data; }
178 
180  XML_Parser &p,
181  void *data,
182  const char *name,
183  XML_Attribute_List &attributes)
184 { (void)c; (void)p; (void)data; (void)name; (void)attributes; }
185 
187  XML_Parser &p,
188  void *data,
189  const char *name,
190  XML_Attribute_List &attributes)
191 { (void)c; (void)p; (void)data; (void)name; (void)attributes;
192  element_open(c, p, data, name, attributes);
193  element_close(c, p, data, name);
194 }
195 
197  XML_Parser &p,
198  void *data,
199  const char *name)
200 { (void)c; (void)p; (void)data; (void)name; }
201 
203  XML_Parser &p,
204  void *data,
205  const char *chars)
206 { (void)c; (void)p; (void)data; (void)chars; }
207 
209  XML_Parser &p,
210  void *data,
211  const char *chars)
212 { (void)c; (void)p; (void)data; (void)chars; }
213 
215  XML_Parser &p,
216  void *data,
217  const char *instruction)
218 { (void)c; (void)p; (void)data; (void)instruction; }
219 
221  XML_Parser &p,
222  void *data)
223 { (void)c; (void)p; (void)data; }
224 
226 {
227  return p.get_error();
228 }
229 
231  XML_Parser &p,
232  void *data,
233  EST_String message)
234 {
235  if (p.current_bit != NULL)
236  p.current_bit->error_message = message;
237  error(c, p, data);
238 }
239 
240  /*************************************************************************/
241  /* */
242  /* An actual parser. */
243  /* */
244  /*************************************************************************/
245 
247  InputSource s,
248  Entity ent,
249  void *d)
250 {
251  pclass=&pc;
252  source=s;
253  initial_entity=ent;
254  data=d;
255  p = NewParser();
256  ParserSetEntityOpener(p, XML_Parser_Class::open_entity);
257  ParserSetFlag(p, ReturnDefaultedAttributes, 1);
258  ParserSetCallbackArg(p, (void *)this);
259 }
260 
262 {
263  if (initial_entity)
264  FreeEntity(initial_entity);
265  FreeDtd(p->dtd);
266  FreeParser(p);
267 }
268 
269 InputSource XML_Parser::open(Entity ent)
270 {
271  return pclass->try_and_open(ent);
272 }
273 
275 {
276 
277  if (p_track_context)
278  p_context.clear();
279 
280  if (ParserPush(p, source) == -1)
281  EST_error("XML Parser error in push");
282 
283  pclass->document_open(*pclass, *this, data);
284 
285  XBit bit;
286  while (1)
287  {
288  current_bit = bit = ReadXBit(p);
289  if (bit->type == XBIT_eof)
290  break;
291  else if (bit->type == XBIT_start || bit->type == XBIT_empty)
292  {
293  Attribute b;
294  XML_Attribute_List att(10);
295 
296  for (b=bit->attributes; b; b=b->next)
297  {
298  att.add_item(EST_String(b->definition->name), EST_String(b->value));
299  }
300 
301  if (bit->type == XBIT_start)
302  {
304  *this,
305  data,
306  bit->element_definition->name,
307  att
308  );
309  if (p_track_context)
310  {
311  EST_String nm(bit->element_definition->name);
312  p_context.push(nm);
313  }
314 
315  }
316  else
317  pclass->element(*pclass,
318  *this,
319  data,
320  bit->element_definition->name,
321  att
322  );
323  }
324  else if (bit->type == XBIT_end)
325  {
326  if (p_track_context)
327  p_context.pop();
328 
330  *this,
331  data,
332  bit->element_definition->name
333  );
334  }
335  else if (bit->type == XBIT_pcdata)
336  {
337  pclass->pcdata(*pclass,
338  *this,
339  data,
340  bit->pcdata_chars
341  );
342  }
343  else if (bit->type == XBIT_cdsect)
344  {
345  pclass->cdata(*pclass,
346  *this,
347  data,
348  bit->cdsect_chars
349  );
350  }
351  else if (bit->type == XBIT_pi)
352  {
354  *this,
355  data,
356  bit->pi_chars
357  );
358  }
359  else if (bit->type == XBIT_error)
360  {
361  pclass->error(*pclass,
362  *this,
363  data);
364  break;
365  }
366  else
367  {
368  // ignore it
369  }
370  FreeXBit(bit);
371  current_bit=NULL;
372  }
373 
374  if (current_bit!=NULL)
375  {
376  FreeXBit(bit);
377  current_bit=NULL;
378  }
379 
380  pclass->document_close(*pclass, *this, data);
381 }
382 
384 {
385  p_track_context=flag;
386 }
387 
389 {
390  p_track_contents=flag;
391 }
392 
393 
394 // Stolen from xmlparser.c, will need to be tweaked for internal rxp changes.
396 {
397  int linenum, charnum;
398  InputSource s;
399  XBit bit = current_bit;
400 
401  if (!bit)
402  return "No Parse In Progress";
403 
404  p_error_message =
406  bit->type == XBIT_error ? "Error" : "Warning",
407  ": ",
408  bit->error_message?bit->error_message:"non XML error"
409  );
410 
411  for(s=p->source; s; s=s->parent)
412  {
413  if(s->entity->name)
414  {
415  p_error_message += " in entity \"";
416  p_error_message += s->entity->name;
417  p_error_message += "\"";
418  }
419  else
420  p_error_message += " in unnamed entity";
421 
422  switch(SourceLineAndChar(s, &linenum, &charnum))
423  {
424  case 1:
425  p_error_message += EST_String::cat(" at line ",
426  EST_String::Number(linenum+1),
427  " char ",
428  EST_String::Number(charnum+1),
429  " of ");
430  break;
431  case 0:
432  p_error_message += EST_String::cat(" defined at line ",
433  EST_String::Number(linenum+1),
434  " char ",
435  EST_String::Number(charnum+1),
436  " of ");
437  break;
438  case -1:
439  p_error_message += " defined in ";
440  break;
441  }
442 
443  p_error_message += EntityDescription(s->entity);
444  p_error_message += "\n";
445  }
446 
447  return (const char *)p_error_message;
448 }
449 
451 {
452  return p_context.nth(n);
453 }
454