Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
XML_Parser.h
1  /************************************************************************/
2  /* */
3  /* Centre for Speech Technology Research */
4  /* University of Edinburgh, UK */
5  /* Copyright (c) 1996,1997 */
6  /* All Rights Reserved. */
7  /* */
8  /* Permission is hereby granted, free of charge, to use and distribute */
9  /* this software and its documentation without restriction, including */
10  /* without limitation the rights to use, copy, modify, merge, publish, */
11  /* distribute, sublicense, and/or sell copies of this work, and to */
12  /* permit persons to whom this work is furnished to do so, subject to */
13  /* the following conditions: */
14  /* 1. The code must retain the above copyright notice, this list of */
15  /* conditions and the following disclaimer. */
16  /* 2. Any modifications must be clearly marked as such. */
17  /* 3. Original authors' names are not deleted. */
18  /* 4. The authors' names are not used to endorse or promote products */
19  /* derived from this software without specific prior written */
20  /* permission. */
21  /* */
22  /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23  /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24  /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25  /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26  /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27  /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28  /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29  /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30  /* THIS SOFTWARE. */
31  /* */
32  /*************************************************************************/
33 
34 
35 #ifndef __XML_PARSER_H__
36 #define __XML_PARSER_H__
37 
38 #if !defined(CHAR_SIZE)
39 # define CHAR_SIZE 8
40 #endif
41 
42 #if (CHAR_SIZE!=8)
43 # error EST can only handle 8 bit characters
44 #endif
45 
46 #include "EST_String.h"
47 #include "EST_Regex.h"
48 #include "EST_TKVL.h"
49 #include "EST_THash.h"
50 #include "EST_TDeque.h"
51 #include "EST_TList.h"
52 #include "rxp/rxp.h"
53 
54 // We only use types and functions from rxp.h, so we can throw away
55 // some of the macros which cause problems.
56 
57 #undef get
58 
59 
60 /**@name XML Parser
61  * Recursive descent parsing skeleton with hooks for processing.
62  * A C++ wrapper around the rxp parser.
63  *
64  * @author Richard Caley <rjc@cstr.ed.ac.uk>
65  * @version $Id: XML_Parser.h,v 1.3 2004/05/04 00:00:17 awb Exp $
66  */
67 //@{
68 
69 class XML_Parser;
70 class XML_Parser_Class;
71 
72 /// Nice name for list of attribute-value pairs.
74 
75 /** A Class of parsers, All parsers share callbacks and a
76  * list of known public IDs.
77  */
79 
80 private:
81 
82  /** Map PUBLIC and SYSTEM IDs to places on the local system.
83  */
85 
86 protected:
87  /** Do any necessary remappings and open a stream which reads the given
88  * entity.
89  */
90  static InputSource open_entity(Entity ent, void *arg);
91 
92 
93  /**@name The callbacks.
94  *
95  * These methods can be overridden in a subclass to create a class
96  * of parsers to do whatever you want.
97  */
98  //@{
99 
100  /** Called when starting a document.
101  */
102  virtual void document_open(XML_Parser_Class &c,
103  XML_Parser &p,
104  void *data);
105 
106  /** Called at the end of a document.
107  */
108  virtual void document_close(XML_Parser_Class &c,
109  XML_Parser &p,
110  void *data);
111 
112  /** Called when an element starts.
113  */
114  virtual void element_open(XML_Parser_Class &c,
115  XML_Parser &p,
116  void *data,
117  const char *name,
118  XML_Attribute_List &attributes);
119 
120  /** Called when an element ends.
121  */
122  virtual void element_close(XML_Parser_Class &c,
123  XML_Parser &p,
124  void *data,
125  const char *name);
126 
127  /** Called for empty elements.
128  *
129  * Defaults to element_open(...) followed by element_closed(...).
130  */
131  virtual void element(XML_Parser_Class &c,
132  XML_Parser &p,
133  void *data,
134  const char *name,
135  XML_Attribute_List &attributes);
136 
137  /** Called for parsed character data sequences.
138  */
139  virtual void pcdata(XML_Parser_Class &c,
140  XML_Parser &p,
141  void *data,
142  const char *chars);
143  /** Called for unparsed character data sequences.
144  */
145  virtual void cdata(XML_Parser_Class &c,
146  XML_Parser &p,
147  void *data,
148  const char *chars);
149 
150  /** Called for processing directives.
151  */
152  virtual void processing(XML_Parser_Class &c,
153  XML_Parser &p,
154  void *data,
155  const char *instruction);
156 
157  /** Called when there is an error in parsing.
158  */
159  virtual void error(XML_Parser_Class &c,
160  XML_Parser &p,
161  void *data);
162  //@}
163 
164  /** This can be called from any of the callbacks to present "message"
165  * as an error through the error callback, thus getting filename and
166  * line information into the message.
167  */
168  void error(XML_Parser_Class &c,
169  XML_Parser &p,
170  void *data,
171  EST_String message);
172 
173  /// Get the error message for the last error.
174  const char *get_error(XML_Parser &p);
175 
176 public:
177 
178  /** Create an object representing the class of parsers.
179  */
181 
182  virtual ~XML_Parser_Class() { }
183 
184  /** Add a mapping from entity ID (SYSTEM or PUBLIC) to filename.
185  *
186  * The string can contain escapes like \2 which are replaced by
187  * the text matching the Nth bracketed part of the regular expression.
188  */
189  void register_id(EST_Regex id_pattern, EST_String directory);
190 
191  /** Fill in the list with the known entity ID mappings.
192  */
193 
195 
196  /**@name Creating a parser
197  *
198  * Each of these methods creates a one-shot parser which will run over the
199  * indicated text.
200  */
201  //@{
202 
203  /// Create a parser for the RXP InputSource.
204  XML_Parser *make_parser(InputSource source, void *data);
205 
206  /// Create a parser for the RXP InputSource.
207  XML_Parser *make_parser(InputSource source, Entity initial_entity, void *data);
208 
209  /// Create a parser for a stdio input stream.
210  XML_Parser *make_parser(FILE *input, void *data);
211 
212  /** Create a parser for a stdio input stream, giving a description for
213  * use in errors.
214  */
215  XML_Parser *make_parser(FILE *input, const EST_String desc, void *data);
216 
217  // Create a parser for the named file.
218  XML_Parser *make_parser(const EST_String filename, void *data);
219 
220  //@}
221 
222  /** Utility which tries to open an entity called ID at places
223  * specified in the mapping of this parser class.
224  */
225 
226  InputSource try_and_open(Entity ent);
227 
228  /** XML_Parser defines the behaviour of an individual one-shot
229  * parser.
230  */
231  friend class XML_Parser;
232 };
233 
234 /** An actual parser. Each such instance parses just one stream which is
235  * given when the parser is created.
236  *
237  * The behaviour of the parser is given by the class to which it belongs.
238  */
239 
240 class XML_Parser {
241 
242 private:
243  /// Last error message from the parser.
244  EST_String p_error_message;
245 
246  /// Set true when context is being remembered.
247  bool p_track_context;
248 
249  /// Set true when contents is being remembered. (not yet implemented)
250  bool p_track_contents;
251 
252 protected:
253  /** The class to which this parser belongs. Defines the behaviour of
254  * the parser.
255  */
257 
258  /// The piece of markup being processed.
260 
261  /// Where we are reading from.
262  InputSource source;
263 
264  /** The entity we started from. May need to be freed at the end of the
265  * parse.
266  */
268 
269  /// Arbitrary data which can be used by callbacks.
270  void *data;
271 
272  /// The RXP parser object.
273  Parser p;
274 
275  /// If context is being tracked, this is a stack of element names.
277 
278 
279  /// Creator used by XML_Parser_Class::make_parser()
280  XML_Parser(XML_Parser_Class &parent,
281  InputSource source,
282  Entity initial_entity,
283  void *data);
284 
285  /// Open. Asks the parser class to do the work.
286  InputSource open(Entity ent);
287 
288  /// Get the error message for the last error.
289  const char *get_error();
290 
291 public:
292 
293  /// Destructor, may close input if required.
294  ~XML_Parser();
295 
296  /** Request that parser keep track of the currently open elements.
297  *
298  * These are recorded on a atsck. Use context() to access the information.
299  */
300  void track_context(bool flag);
301  /** Keep track of the content of open elements.
302  *
303  * Not yet implemented.
304  */
305  void track_contents(bool flag);
306 
307  /** Get the name of the nth enclosing element.
308  *
309  * context(0) is the element we are directly inside.
310  */
311  EST_String context(int n);
312 
313  /// Run the parser.
314  void go();
315 
316  friend class XML_Parser_Class;
317 };
318 
319 //@}
320 
321 #endif
322