Edinburgh Speech Tools
2.4-release
All
Classes
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Pages
XML_Parser.h
1
/************************************************************************/
2
/* */
3
/* Centre for Speech Technology Research */
4
/* University of Edinburgh, UK */
5
/* Copyright (c) 1996,1997 */
6
/* All Rights Reserved. */
7
/* */
8
/* Permission is hereby granted, free of charge, to use and distribute */
9
/* this software and its documentation without restriction, including */
10
/* without limitation the rights to use, copy, modify, merge, publish, */
11
/* distribute, sublicense, and/or sell copies of this work, and to */
12
/* permit persons to whom this work is furnished to do so, subject to */
13
/* the following conditions: */
14
/* 1. The code must retain the above copyright notice, this list of */
15
/* conditions and the following disclaimer. */
16
/* 2. Any modifications must be clearly marked as such. */
17
/* 3. Original authors' names are not deleted. */
18
/* 4. The authors' names are not used to endorse or promote products */
19
/* derived from this software without specific prior written */
20
/* permission. */
21
/* */
22
/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25
/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30
/* THIS SOFTWARE. */
31
/* */
32
/*************************************************************************/
33
34
35
#ifndef __XML_PARSER_H__
36
#define __XML_PARSER_H__
37
38
#if !defined(CHAR_SIZE)
39
# define CHAR_SIZE 8
40
#endif
41
42
#if (CHAR_SIZE!=8)
43
# error EST can only handle 8 bit characters
44
#endif
45
46
#include "EST_String.h"
47
#include "EST_Regex.h"
48
#include "EST_TKVL.h"
49
#include "EST_THash.h"
50
#include "EST_TDeque.h"
51
#include "EST_TList.h"
52
#include "rxp/rxp.h"
53
54
// We only use types and functions from rxp.h, so we can throw away
55
// some of the macros which cause problems.
56
57
#undef get
58
59
60
/**@name XML Parser
61
* Recursive descent parsing skeleton with hooks for processing.
62
* A C++ wrapper around the rxp parser.
63
*
64
* @author Richard Caley <rjc@cstr.ed.ac.uk>
65
* @version $Id: XML_Parser.h,v 1.3 2004/05/04 00:00:17 awb Exp $
66
*/
67
//@{
68
69
class
XML_Parser
;
70
class
XML_Parser_Class
;
71
72
/// Nice name for list of attribute-value pairs.
73
typedef
EST_TStringHash<EST_String>
XML_Attribute_List
;
74
75
/** A Class of parsers, All parsers share callbacks and a
76
* list of known public IDs.
77
*/
78
class
XML_Parser_Class
{
79
80
private
:
81
82
/** Map PUBLIC and SYSTEM IDs to places on the local system.
83
*/
84
EST_TKVL<EST_Regex, EST_String>
known_ids;
85
86
protected
:
87
/** Do any necessary remappings and open a stream which reads the given
88
* entity.
89
*/
90
static
InputSource
open_entity
(Entity ent,
void
*arg);
91
92
93
/**@name The callbacks.
94
*
95
* These methods can be overridden in a subclass to create a class
96
* of parsers to do whatever you want.
97
*/
98
//@{
99
100
/** Called when starting a document.
101
*/
102
virtual
void
document_open
(
XML_Parser_Class
&c,
103
XML_Parser
&p,
104
void
*data);
105
106
/** Called at the end of a document.
107
*/
108
virtual
void
document_close
(
XML_Parser_Class
&c,
109
XML_Parser
&p,
110
void
*data);
111
112
/** Called when an element starts.
113
*/
114
virtual
void
element_open
(
XML_Parser_Class
&c,
115
XML_Parser
&p,
116
void
*data,
117
const
char
*name,
118
XML_Attribute_List
&attributes);
119
120
/** Called when an element ends.
121
*/
122
virtual
void
element_close
(
XML_Parser_Class
&c,
123
XML_Parser
&p,
124
void
*data,
125
const
char
*name);
126
127
/** Called for empty elements.
128
*
129
* Defaults to element_open(...) followed by element_closed(...).
130
*/
131
virtual
void
element
(
XML_Parser_Class
&c,
132
XML_Parser
&p,
133
void
*data,
134
const
char
*name,
135
XML_Attribute_List
&attributes);
136
137
/** Called for parsed character data sequences.
138
*/
139
virtual
void
pcdata
(
XML_Parser_Class
&c,
140
XML_Parser
&p,
141
void
*data,
142
const
char
*chars);
143
/** Called for unparsed character data sequences.
144
*/
145
virtual
void
cdata
(
XML_Parser_Class
&c,
146
XML_Parser
&p,
147
void
*data,
148
const
char
*chars);
149
150
/** Called for processing directives.
151
*/
152
virtual
void
processing
(
XML_Parser_Class
&c,
153
XML_Parser
&p,
154
void
*data,
155
const
char
*instruction);
156
157
/** Called when there is an error in parsing.
158
*/
159
virtual
void
error
(
XML_Parser_Class
&c,
160
XML_Parser
&p,
161
void
*data);
162
//@}
163
164
/** This can be called from any of the callbacks to present "message"
165
* as an error through the error callback, thus getting filename and
166
* line information into the message.
167
*/
168
void
error
(
XML_Parser_Class
&c,
169
XML_Parser
&p,
170
void
*data,
171
EST_String
message);
172
173
/// Get the error message for the last error.
174
const
char
*
get_error
(
XML_Parser
&p);
175
176
public
:
177
178
/** Create an object representing the class of parsers.
179
*/
180
XML_Parser_Class
();
181
182
virtual
~
XML_Parser_Class
() { }
183
184
/** Add a mapping from entity ID (SYSTEM or PUBLIC) to filename.
185
*
186
* The string can contain escapes like \2 which are replaced by
187
* the text matching the Nth bracketed part of the regular expression.
188
*/
189
void
register_id
(
EST_Regex
id_pattern,
EST_String
directory);
190
191
/** Fill in the list with the known entity ID mappings.
192
*/
193
194
void
registered_ids
(
EST_TList<EST_String>
&list);
195
196
/**@name Creating a parser
197
*
198
* Each of these methods creates a one-shot parser which will run over the
199
* indicated text.
200
*/
201
//@{
202
203
/// Create a parser for the RXP InputSource.
204
XML_Parser
*
make_parser
(InputSource source,
void
*data);
205
206
/// Create a parser for the RXP InputSource.
207
XML_Parser
*
make_parser
(InputSource source, Entity initial_entity,
void
*data);
208
209
/// Create a parser for a stdio input stream.
210
XML_Parser
*
make_parser
(FILE *input,
void
*data);
211
212
/** Create a parser for a stdio input stream, giving a description for
213
* use in errors.
214
*/
215
XML_Parser
*
make_parser
(FILE *input,
const
EST_String
desc,
void
*data);
216
217
// Create a parser for the named file.
218
XML_Parser
*
make_parser
(
const
EST_String
filename,
void
*data);
219
220
//@}
221
222
/** Utility which tries to open an entity called ID at places
223
* specified in the mapping of this parser class.
224
*/
225
226
InputSource
try_and_open
(Entity ent);
227
228
/** XML_Parser defines the behaviour of an individual one-shot
229
* parser.
230
*/
231
friend
class
XML_Parser
;
232
};
233
234
/** An actual parser. Each such instance parses just one stream which is
235
* given when the parser is created.
236
*
237
* The behaviour of the parser is given by the class to which it belongs.
238
*/
239
240
class
XML_Parser
{
241
242
private
:
243
/// Last error message from the parser.
244
EST_String
p_error_message;
245
246
/// Set true when context is being remembered.
247
bool
p_track_context;
248
249
/// Set true when contents is being remembered. (not yet implemented)
250
bool
p_track_contents;
251
252
protected
:
253
/** The class to which this parser belongs. Defines the behaviour of
254
* the parser.
255
*/
256
XML_Parser_Class
*
pclass
;
257
258
/// The piece of markup being processed.
259
XBit
current_bit
;
260
261
/// Where we are reading from.
262
InputSource
source
;
263
264
/** The entity we started from. May need to be freed at the end of the
265
* parse.
266
*/
267
Entity
initial_entity
;
268
269
/// Arbitrary data which can be used by callbacks.
270
void
*
data
;
271
272
/// The RXP parser object.
273
Parser
p
;
274
275
/// If context is being tracked, this is a stack of element names.
276
EST_TDeque<EST_String>
p_context
;
277
278
279
/// Creator used by XML_Parser_Class::make_parser()
280
XML_Parser
(
XML_Parser_Class
&parent,
281
InputSource
source
,
282
Entity
initial_entity
,
283
void
*
data
);
284
285
/// Open. Asks the parser class to do the work.
286
InputSource
open
(Entity ent);
287
288
/// Get the error message for the last error.
289
const
char
*
get_error
();
290
291
public
:
292
293
/// Destructor, may close input if required.
294
~XML_Parser
();
295
296
/** Request that parser keep track of the currently open elements.
297
*
298
* These are recorded on a atsck. Use context() to access the information.
299
*/
300
void
track_context
(
bool
flag);
301
/** Keep track of the content of open elements.
302
*
303
* Not yet implemented.
304
*/
305
void
track_contents
(
bool
flag);
306
307
/** Get the name of the nth enclosing element.
308
*
309
* context(0) is the element we are directly inside.
310
*/
311
EST_String
context
(
int
n);
312
313
/// Run the parser.
314
void
go
();
315
316
friend
class
XML_Parser_Class
;
317
};
318
319
//@}
320
321
#endif
322
include
rxp
XML_Parser.h
Generated on Wed Dec 24 2014 09:16:35 for Edinburgh Speech Tools by
1.8.3.1