Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
xmlparser.h
1 /*************************************************************************/
2 /* */
3 /* Copyright (c) 1997-98 Richard Tobin, Language Technology Group, HCRC, */
4 /* University of Edinburgh. */
5 /* */
6 /* THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, */
7 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
8 /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
9 /* IN NO EVENT SHALL THE AUTHOR OR THE UNIVERSITY OF EDINBURGH BE LIABLE */
10 /* FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF */
11 /* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION */
12 /* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
13 /* */
14 /*************************************************************************/
15 /* $Id: xmlparser.h,v 1.2 2001/04/04 13:11:27 awb Exp $ */
16 
17 #ifndef XMLPARSER_H
18 #define XMLPARSER_H
19 
20 #include "dtd.h"
21 #include "input.h"
22 
23 /* Typedefs */
24 
25 typedef struct parser_state *Parser;
26 typedef struct attribute *Attribute;
27 typedef struct content_particle *ContentParticle;
28 typedef struct xbit *XBit;
29 typedef void CallbackProc(XBit bit, void *arg);
30 typedef InputSource EntityOpenerProc(Entity e, void *arg);
31 
32 /* Bits */
33 
34 enum xbit_type {
35  XBIT_dtd,
36  XBIT_start, XBIT_empty, XBIT_end, XBIT_eof, XBIT_pcdata,
37  XBIT_pi, XBIT_comment, XBIT_cdsect, XBIT_xml,
38  XBIT_error, XBIT_warning, XBIT_none,
39  XBIT_enum_count
40 };
41 typedef enum xbit_type XBitType;
42 
43 extern XML_API const char8 *XBitTypeName[XBIT_enum_count];
44 
45 struct attribute {
46  AttributeDefinition definition; /* The definition of this attribute */
47  Char *value; /* The (possibly normalised) value */
48  int quoted; /* Was it quoted? */
49  struct attribute *next; /* The next attribute or null */
50 };
51 
52 enum cp_type {
53  CP_pcdata, CP_name, CP_seq, CP_choice
54 };
55 typedef enum cp_type CPType;
56 
58  enum cp_type type;
59  int repetition;
60  Char *name;
61  int nchildren;
62  struct content_particle **children;
63 };
64 
65 struct xbit {
66  Entity entity;
67  int byte_offset;
68  enum xbit_type type;
69  char8 *s1, *s2;
70  Char *S1, *S2;
71  int i1, i2;
72  Attribute attributes;
73  ElementDefinition element_definition;
74 #ifndef FOR_LT
75  int nchildren;
76  struct xbit *parent;
77  struct xbit **children;
78 #endif
79 };
80 
81 #define pcdata_chars S1
82 
83 #define pi_name S1
84 #define pi_chars S2
85 
86 #define comment_chars S1
87 
88 #define cdsect_chars S1
89 
90 #define xml_version s1
91 #define xml_encoding_name s2
92 #define xml_standalone i1
93 #define xml_encoding i2
94 
95 #define error_message s1
96 
97 /* Parser flags */
98 
99 enum parser_flag {
100  ExpandCharacterEntities,
101  ExpandGeneralEntities,
102  XMLPiEnd,
103  XMLEmptyTagEnd,
104  XMLPredefinedEntities,
105  ErrorOnUnquotedAttributeValues,
106  NormaliseAttributeValues,
107  NormalizeAttributeValues,
108  ErrorOnBadCharacterEntities,
109  ErrorOnUndefinedEntities,
110  ReturnComments,
111  CaseInsensitive,
112  ErrorOnUndefinedElements,
113  WarnOnUndefinedElements,
114  ErrorOnUndefinedAttributes,
115  WarnOnUndefinedAttributes,
116  WarnOnRedefinitions,
117  TrustSDD,
118  XMLExternalIDs,
119  ReturnDefaultedAttributes,
120  MergePCData,
121  XMLMiscWFErrors,
122  XMLStrictWFErrors,
123  AllowMultipleElements,
124  CheckEndTagsMatch,
125  IgnoreEntities,
126  XMLLessThan
127 };
128 typedef enum parser_flag ParserFlag;
129 
130 /* Parser */
131 
132 enum parse_state
133  {PS_prolog1, PS_prolog2, PS_body, PS_epilog, PS_end, PS_error};
134 
135 struct element_info {
136  ElementDefinition definition;
137  Entity entity;
138 };
139 
140 struct parser_state {
141  enum parse_state state;
142  Entity document_entity;
143  int have_dtd; /* True if dtd has been processed */
144  StandaloneDeclaration standalone;
145  struct input_source *source;
146  Char *name, *pbuf;
147  int namelen, pbufsize, pbufnext;
148  struct xbit xbit;
149  int peeked;
150  Dtd dtd; /* The document's DTD */
151  CallbackProc *dtd_callback;
152  CallbackProc *warning_callback;
153  EntityOpenerProc *entity_opener;
154  unsigned int flags;
155  struct element_info *element_stack;
156  int element_stack_alloc;
157  int element_depth;
158  void *callback_arg;
159  int external_pe_depth; /* To keep track of whether we're in the */
160  /* internal subset: 0 <=> yes */
161 };
162 
163 XML_API int ParserInit(void);
164 XML_API Parser NewParser(void);
165 XML_API void FreeParser(Parser p);
166 
167 XML_API Entity ParserRootEntity(Parser p);
168 XML_API InputSource ParserRootSource(Parser p);
169 
170 XML_API XBit ReadXBit(Parser p);
171 XML_API XBit PeekXBit(Parser p);
172 XML_API void FreeXBit(XBit xbit);
173 
174 #ifndef FOR_LT
175 XBit ReadXTree(Parser p);
176 void FreeXTree(XBit tree);
177 #endif
178 
179 XML_API XBit ParseDtd(Parser p, Entity e);
180 
181 XML_API void ParserSetWarningCallback(Parser p, CallbackProc cb);
182 XML_API void ParserSetDtdCallback(Parser p, CallbackProc cb);
183 XML_API void ParserSetEntityOpener(Parser p, EntityOpenerProc opener);
184 XML_API void ParserSetCallbackArg(Parser p, void *arg);
185 
186 XML_API int ParserPush(Parser p, InputSource source);
187 XML_API void ParserPop(Parser p);
188 
189 XML_API void ParserSetFlag(Parser p, ParserFlag flag, int value);
190 #define ParserGetFlag(p, flag) ((p)->flags & (1 << (flag)))
191 
192 XML_API void ParserPerror(Parser p, XBit bit);
193 
194 #endif /* XMLPARSER_H */