18 static char vcid[] =
"$Id: xmlparser.c,v 1.3 2004/05/04 00:00:17 awb Exp $";
31 #include "lt-memory.h"
35 #define Realloc srealloc
50 #include "xmlparser.h"
52 static int transcribe(Parser p,
int back,
int count);
53 static void pop_while_at_eoe(Parser p);
54 static void maybe_uppercase(Parser p, Char *s);
55 static void maybe_uppercase_name(Parser p);
56 static int str_maybecase_cmp8(Parser p,
const char8 *a,
const char8 *b);
57 static int is_ascii_alpha(
int c);
58 static int is_ascii_digit(
int c);
59 static int parse_external_id(Parser p,
int required,
60 char8 **publicid, char8 **systemid,
62 static int parse_conditional(Parser p);
63 static int parse_notation_decl(Parser p);
64 static int parse_entity_decl(Parser p, Entity ent,
int line,
int chpos);
65 static int parse_attlist_decl(Parser p);
66 static int parse_element_decl(Parser p);
67 static ContentParticle parse_cp(Parser p);
68 static ContentParticle parse_choice_or_seq(Parser p);
69 static ContentParticle parse_choice_or_seq_1(Parser p,
int nchildren,
char sep);
70 static int check_content_decl(Parser p, ContentParticle cp);
71 static int check_content_decl_1(Parser p, ContentParticle cp);
72 static Char *stringify_cp(ContentParticle cp);
73 static void print_cp(ContentParticle cp, FILE16 *f);
74 static int size_cp(ContentParticle cp);
75 void FreeContentParticle(ContentParticle cp);
76 static int parse_reference(Parser p,
int pe,
int expand,
int allow_external);
77 static int parse_character_reference(Parser p,
int expand);
78 static const char8 *escape(
int c);
79 static int parse_name(Parser p,
const char8 *where);
80 static int parse_nmtoken(Parser p,
const char8 *where);
81 static int looking_at(Parser p,
const char8 *
string);
82 static void clear_xbit(XBit
xbit);
83 static int expect(Parser p,
int expected,
const char8 *where);
84 static int expect_dtd_whitespace(Parser p,
const char8 *where);
85 static void skip_whitespace(InputSource s);
86 static int skip_dtd_whitespace(Parser p,
int allow_pe);
87 static int parse_cdata(Parser p);
88 static int process_nsl_decl(Parser p);
89 static int process_xml_decl(Parser p);
90 static int parse_dtd(Parser p);
91 static int read_markupdecls(Parser p);
92 static int error(Parser p,
const char8 *format, ...);
93 static void warn(Parser p,
const char8 *format, ...);
94 static void verror(XBit bit,
const char8 *format, va_list args);
95 enum literal_type {LT_cdata_attr, LT_tok_attr, LT_plain, LT_entity};
96 static int parse_string(Parser p,
const char8 *where,
enum literal_type type);
97 static int parse_pi(Parser p);
98 static int parse_comment(Parser p,
int skip);
99 static int parse_pcdata(Parser p);
100 static int parse_starttag(Parser p);
101 static int parse_attribute(Parser p);
102 static int parse_endtag(Parser p);
103 static int parse_markup(Parser p);
104 static int parse(Parser p);
105 static int parse_markupdecl(Parser p);
107 #define require(x) if(x >= 0) {} else return -1
108 #define require0(x) if(x >= 0) {} else return 0
110 #define Consume(buf) (buf = 0, buf##size = 0)
111 #define ExpandBuf(buf, sz) \
112 if(buf##size >= (sz)+1) {} else if((buf = Realloc(buf, (buf##size = sz + 1) * sizeof(Char)))) {} else return error(p, "System error")
114 #define CopyName(n) if((n = Malloc((p->namelen + 1)*sizeof(Char)))) {memcpy(n, p->name, p->namelen * sizeof(Char)); n[p->namelen] = 0;} else return error(p, "System error");
116 #define CopyName0(n) if((n = Malloc((p->namelen + 1)*sizeof(Char)))) {memcpy(n, p->name, p->namelen * sizeof(Char)); n[p->namelen] = 0;} else {error(p, "System error"); return 0;}
118 const char8 *XBitTypeName[XBIT_enum_count] = {
134 static Entity xml_builtin_entity;
135 static Entity xml_predefined_entities;
139 static int initialised = 0;
142 static const Char lt[] = {
'l',
't',0}, ltval[] = {
'&',
'#',
'6',
'0',
';',0};
143 static const Char gt[] = {
'g',
't',0}, gtval[] = {
'&',
'#',
'6',
'2',
';',0};
144 static const Char amp[] = {
'a',
'm',
'p',0},
145 ampval[] = {
'&',
'#',
'3',
'8',
';',0};
146 static const Char apos[] = {
'a',
'p',
'o',
's',0}, aposval[] = {
'\'',0};
147 static const Char quot[] = {
'q',
'u',
'o',
't',0}, quotval[] = {
'"',0};
148 static const Char *builtins[5][2] = {
149 {lt, ltval}, {gt, gtval}, {amp, ampval},
150 {apos, aposval}, {quot, quotval}
162 for(i=0, f=0; i<5; i++, f=e)
164 e = NewInternalEntity(builtins[i][0], builtins[i][1],
165 xml_builtin_entity, 0, 0, 0);
171 xml_predefined_entities = e;
176 static void skip_whitespace(InputSource s)
180 while((c =
get(s)) != XEOE && is_xml_whitespace(c))
191 static int skip_dtd_whitespace(Parser p,
int allow_pe)
195 InputSource s = p->source;
208 "PE end not allowed here in internal subset");
209 if(s->entity->type == ET_external)
210 p->external_pe_depth--;
220 else if(is_xml_whitespace(c))
229 c =
get(s); unget(s);
230 if(c != XEOE && is_xml_namestart(c))
236 "PE ref not allowed here in internal subset");
238 require(parse_reference(p, 1, 1, 1));
240 if(s->entity->type == ET_external)
241 p->external_pe_depth++;
258 static int expect(Parser p,
int expected,
const char8 *where)
261 InputSource s = p->source;
267 return error(p,
"Expected %s %s, but got %s",
268 escape(expected), where, escape(c));
278 static int expect_dtd_whitespace(Parser p,
const char8 *where)
280 int r = skip_dtd_whitespace(p, p->external_pe_depth > 0);
286 return error(p,
"Expected whitespace %s", where);
291 static void clear_xbit(XBit
xbit)
293 xbit->type = XBIT_none;
294 xbit->s1 = xbit->s2 = 0;
295 xbit->S1 = xbit->S2 = 0;
296 xbit->attributes = 0;
297 xbit->element_definition = 0;
300 void FreeXBit(XBit
xbit)
304 if(xbit->S1) Free(xbit->S1);
305 if(xbit->S2) Free(xbit->S2);
306 if(xbit->type != XBIT_error && xbit->type != XBIT_warning && xbit->s1)
308 if(xbit->s2) Free(xbit->s2);
309 for(a = xbit->attributes; a; a = b)
312 if(a->value) Free(a->value);
329 static int looking_at(Parser p,
const char8 *
string)
331 InputSource s = p->source;
335 for(c = *
string++; c; c = *
string++)
342 if(c ==
' ' && *
string == 0)
344 if(d == XEOE || !is_xml_whitespace(d))
349 if((ParserGetFlag(p, CaseInsensitive) &&
350 Toupper(d) != Toupper(c)) ||
351 (!ParserGetFlag(p, CaseInsensitive) && d != c))
362 static int parse_name(Parser p,
const char8 *where)
364 InputSource s = p->source;
368 if(c == XEOE || !is_xml_namestart(c))
371 error(p,
"Expected name, but got %s %s", escape(c), where);
376 while(c =
get(s), (c != XEOE && is_xml_namechar(c)))
380 p->name = s->line + s->next - i;
386 static int parse_nmtoken(Parser p,
const char8 *where)
388 InputSource s = p->source;
391 while(c =
get(s), (c !=XEOE && is_xml_namechar(c)))
396 return error(p,
"Expected nmtoken value, but got %s %s",
399 p->name = s->line + s->next - i;
408 static const char8 *escape(
int c)
410 static char8 buf[5][15];
411 static int bufnum=-1;
418 bufnum = (bufnum + 1) % 5;
422 else if(c >= 33 && c <= 126)
423 sprintf(buf[bufnum],
"%c", c);
425 sprintf(buf[bufnum],
"<space>");
427 sprintf(buf[bufnum],
"<0x%x>", c);
432 Parser NewParser(
void)
436 if(ParserInit() == -1)
439 p = Malloc(
sizeof(*p));
442 p->state = PS_prolog1;
443 p->document_entity = 0;
445 p->standalone = SDD_unspecified;
448 clear_xbit(&p->xbit);
450 p->xbit.nchildren = 0;
451 p->xbit.children = 0;
454 p->pbufsize = p->pbufnext = 0;
458 p->dtd_callback = p->warning_callback = 0;
459 p->entity_opener = 0;
461 p->external_pe_depth = 0;
463 p->element_stack = 0;
464 p->element_stack_alloc = 0;
465 p->element_depth = 0;
467 ParserSetFlag(p, XMLPiEnd, 1);
468 ParserSetFlag(p, XMLEmptyTagEnd, 1);
469 ParserSetFlag(p, XMLPredefinedEntities, 1);
470 ParserSetFlag(p, XMLExternalIDs, 1);
471 ParserSetFlag(p, XMLMiscWFErrors, 1);
472 ParserSetFlag(p, ErrorOnUnquotedAttributeValues, 1);
473 ParserSetFlag(p, XMLLessThan, 1);
474 ParserSetFlag(p, IgnoreEntities, 0);
475 ParserSetFlag(p, ExpandGeneralEntities, 1);
476 ParserSetFlag(p, ExpandCharacterEntities, 1);
477 ParserSetFlag(p, NormaliseAttributeValues, 1);
478 ParserSetFlag(p, WarnOnUndefinedElements, 1);
479 ParserSetFlag(p, WarnOnUndefinedAttributes, 1);
480 ParserSetFlag(p, WarnOnRedefinitions, 1);
481 ParserSetFlag(p, TrustSDD, 1);
482 ParserSetFlag(p, ReturnComments, 1);
483 ParserSetFlag(p, CheckEndTagsMatch, 1);
488 void FreeParser(Parser p)
494 Free(p->element_stack);
498 InputSource ParserRootSource(Parser p)
502 for(s=p->source; s && s->parent; s = s->parent)
508 Entity ParserRootEntity(Parser p)
510 return ParserRootSource(p)->entity;
513 void ParserSetCallbackArg(Parser p,
void *arg)
515 p->callback_arg = arg;
518 void ParserSetDtdCallback(Parser p, CallbackProc cb)
520 p->dtd_callback = cb;
523 void ParserSetWarningCallback(Parser p, CallbackProc cb)
525 p->warning_callback = cb;
528 void ParserSetEntityOpener(Parser p, EntityOpenerProc opener)
530 p->entity_opener = opener;
535 XBit ReadXTree(Parser p)
537 XBit bit, tree, child;
548 if(!(tree = Malloc(
sizeof(*tree))))
550 error(p,
"System error");
556 child = ReadXTree(p);
566 error(p,
"EOF in element");
571 if(child->element_definition != tree->element_definition)
573 const Char *name1 = tree->element_definition->name,
574 *name2 = child->element_definition->name;
577 error(p,
"Mismatched end tag: expected </%S>, got </%S>",
585 children = Realloc(tree->children,
586 (tree->nchildren + 1) *
sizeof(XBit));
591 error(p,
"System error");
594 child->parent = tree;
595 children[tree->nchildren] = child;
597 tree->children = children;
603 if(!(tree = Malloc(
sizeof(*tree))))
605 error(p,
"System error");
613 void FreeXTree(XBit tree)
617 for(i=0; i<tree->nchildren; i++)
618 FreeXTree(tree->children[i]);
620 Free(tree->children);
624 if(tree->type == XBIT_error)
633 XBit ReadXBit(Parser p)
643 XBit PeekXBit(Parser p)
646 error(p,
"Attempt to peek twice");
656 int ParserPush(Parser p, InputSource source)
658 if(!p->source && !p->document_entity)
659 p->document_entity = source->entity;
661 source->parent = p->source;
664 if(source->entity->type == ET_internal)
670 if(source->entity->encoding == CE_unknown)
671 determine_character_encoding(source);
674 if(!EncodingIsAsciiSuperset(source->entity->encoding))
675 return error(p,
"Unsupported character encoding %s",
676 CharacterEncodingName[source->entity->encoding]);
678 if(source->entity->encoding == CE_unknown)
679 return error(p,
"Unknown character encoding");
682 get(source); unget(source);
684 source->entity->ml_decl = ML_unspecified;
685 if(looking_at(p,
"<?NSL "))
686 return process_nsl_decl(p);
687 if(looking_at(p,
"<?xml "))
689 require(process_xml_decl(p));
690 if(source->entity == p->document_entity &&
691 !source->entity->version_decl)
692 return error(p,
"XML declaration in document entity lacked "
694 if(source->entity != p->document_entity &&
695 source->entity->standalone_decl != SDD_unspecified)
696 return error(p,
"Standalone attribute not allowed except in "
700 else if(!ParserGetFlag(p, XMLStrictWFErrors) && looking_at(p,
"<?XML "))
702 warn(p,
"Found <?XML instead of <?xml; switching to case-"
704 ParserSetFlag(p, CaseInsensitive, 1);
705 return process_xml_decl(p);
711 void ParserPop(Parser p)
716 Fclose(source->file16);
717 p->source = source->parent;
719 if(source->entity->type == ET_external)
726 static int at_eoe(InputSource s)
730 if(s->seen_eoe || get_with_fill(s) == XEOE)
739 static void pop_while_at_eoe(Parser p)
743 InputSource s = p->source;
756 void ParserSetFlag(Parser p, ParserFlag flag,
int value)
759 p->flags |= (1 << flag);
761 p->flags &= ~(1 << flag);
763 if(flag == XMLPredefinedEntities)
766 p->dtd->predefined_entities = xml_predefined_entities;
768 p->dtd->predefined_entities = 0;
772 void ParserPerror(Parser p, XBit bit)
774 int linenum, charnum;
777 Fprintf(Stderr,
"%s: %s\n",
778 bit->type == XBIT_error ?
"Error" :
"Warning",
782 for(s=p->source; s; s=s->parent)
785 Fprintf(Stderr,
" in entity \"%S\"", s->entity->name);
787 Fprintf(Stderr,
" in unnamed entity");
789 switch(SourceLineAndChar(s, &linenum, &charnum))
792 Fprintf(Stderr,
" at line %d char %d of", linenum+1, charnum+1);
795 Fprintf(Stderr,
" defined at line %d char %d of",
796 linenum+1, charnum+1);
799 Fprintf(Stderr,
" defined in");
803 Fprintf(Stderr,
" %s\n", EntityDescription(s->entity));
808 static int parse(Parser p)
813 if(p->state == PS_end || p->state == PS_error)
816 p->xbit.type = XBIT_eof;
820 clear_xbit(&p->xbit);
822 if(p->state <= PS_prolog2 || p->state == PS_epilog)
823 skip_whitespace(p->source);
828 SourcePosition(s, &p->xbit.entity, &p->xbit.byte_offset);
833 if(p->state != PS_epilog)
834 return error(p,
"Document ends too soon");
836 p->xbit.type = XBIT_eof;
839 return parse_markup(p);
841 if(ParserGetFlag(p, IgnoreEntities))
843 if(p->state <= PS_prolog2)
844 return error(p,
"Entity reference not allowed in prolog");
845 if(looking_at(p,
"#"))
851 if(ParserGetFlag(p, ExpandGeneralEntities))
854 require(parse_reference(p, 0, 1, 1));
862 return parse_pcdata(p);
868 static int parse_markup(Parser p)
870 InputSource s = p->source;
876 if(looking_at(p,
"--"))
878 if(ParserGetFlag(p, ReturnComments))
879 return parse_comment(p, 0);
882 require(parse_comment(p, 1));
886 else if(looking_at(p,
"DOCTYPE "))
888 else if(looking_at(p,
"[CDATA["))
889 return parse_cdata(p);
891 return error(p,
"Syntax error after <!");
894 return parse_endtag(p);
901 if(!ParserGetFlag(p, XMLLessThan) &&
902 (c == XEOE || !is_xml_namestart(c)))
907 return parse_pcdata(p);
909 return parse_starttag(p);
913 static int parse_endtag(Parser p)
915 ElementDefinition def;
918 p->xbit.type = XBIT_end;
919 require(parse_name(p,
"after </"));
920 maybe_uppercase_name(p);
922 if(ParserGetFlag(p, CheckEndTagsMatch))
924 if(p->element_depth <= 0)
925 return error(p,
"End tag </%.*S> outside of any element",
926 p->namelen, p->name);
928 ent = p->element_stack[--p->element_depth].entity;
929 def = p->element_stack[p->element_depth].definition;
931 if(p->namelen == def->namelen &&
932 memcmp(p->name, def->name, p->namelen *
sizeof(Char)) == 0)
933 p->xbit.element_definition = def;
935 return error(p,
"Mismatched end tag: expected </%S>, got </%.*S>",
936 def->name, p->namelen, p->name);
938 if(ent != p->source->entity)
939 return error(p,
"Element ends in different entity from that "
940 "in which it starts");
942 if(p->element_depth == 0)
943 p->state = PS_epilog;
947 p->xbit.element_definition = FindElementN(p->dtd, p->name, p->namelen);
948 if(!p->xbit.element_definition)
949 return error(p,
"End tag for unknown element %.*S",
950 p->namelen, p->name);
953 skip_whitespace(p->source);
954 return expect(p,
'>',
"after name in end tag");
957 static int parse_starttag(Parser p)
961 if(p->state == PS_epilog && !ParserGetFlag(p, AllowMultipleElements))
962 return error(p,
"Document contains multiple elements");
966 require(parse_name(p,
"after <"));
967 maybe_uppercase_name(p);
969 p->xbit.element_definition = FindElementN(p->dtd, p->name, p->namelen);
970 if(!p->xbit.element_definition || p->xbit.element_definition->tentative)
972 if(p->have_dtd && ParserGetFlag(p, ErrorOnUndefinedElements))
973 return error(p,
"Start tag for undeclared element %.*S",
974 p->namelen, p->name);
975 if(p->have_dtd && ParserGetFlag(p, WarnOnUndefinedElements))
976 warn(p,
"Start tag for undeclared element %.*S; "
977 "declaring it to have content ANY",
978 p->namelen, p->name);
979 if(p->xbit.element_definition)
980 RedefineElement(p->xbit.element_definition, CT_any, 0);
983 if(!(p->xbit.element_definition =
984 DefineElementN(p->dtd, p->name, p->namelen, CT_any, 0)))
985 return error(p,
"System error");
991 InputSource s = p->source;
997 if(c !=XEOE && is_xml_whitespace(c))
1003 !(ParserGetFlag(p, XMLEmptyTagEnd) && c ==
'/'))
1006 return error(p,
"Expected whitespace or tag end in start tag");
1011 p->xbit.type = XBIT_start;
1015 if((ParserGetFlag(p, XMLEmptyTagEnd)) && c ==
'/')
1017 require(expect(p,
'>',
"after / in start tag"));
1018 p->xbit.type = XBIT_empty;
1024 require(parse_attribute(p));
1027 if(ParserGetFlag(p, CheckEndTagsMatch))
1029 if(p->xbit.type == XBIT_start)
1031 if(p->element_depth == p->element_stack_alloc)
1033 p->element_stack_alloc =
1034 p->element_stack_alloc == 0 ? 20 :
1035 p->element_stack_alloc * 2;
1036 if(!(p->element_stack =
1037 Realloc(p->element_stack,
1038 (p->element_stack_alloc *
sizeof(*p->element_stack)))))
1039 return error(p,
"System error");
1041 p->element_stack[p->element_depth].definition =
1042 p->xbit.element_definition;
1043 p->element_stack[p->element_depth++].entity = p->source->entity;
1046 if(p->element_depth == 0)
1047 p->state = PS_epilog;
1050 if(ParserGetFlag(p, ReturnDefaultedAttributes))
1052 AttributeDefinition d;
1055 for(d=NextAttributeDefinition(p->xbit.element_definition, 0);
1057 d=NextAttributeDefinition(p->xbit.element_definition, d))
1059 if(!d->default_value)
1061 for(a=p->xbit.attributes; a; a=a->next)
1062 if(a->definition == d)
1066 if(!(a = Malloc(
sizeof(*a))))
1067 return error(p,
"System error");
1069 if(!(a->value = Strdup(d->default_value)))
1070 return error(p,
"System error");
1072 a->next = p->xbit.attributes;
1073 p->xbit.attributes = a;
1081 static int parse_attribute(Parser p)
1083 InputSource s = p->source;
1084 AttributeDefinition def;
1088 require(parse_name(p,
"for attribute"));
1089 maybe_uppercase_name(p);
1091 def = FindAttributeN(p->xbit.element_definition, p->name, p->namelen);
1094 if(p->have_dtd && ParserGetFlag(p, ErrorOnUndefinedAttributes))
1095 return error(p,
"Undeclared attribute %.*S for element %S",
1096 p->namelen, p->name, p->xbit.element_definition->name);
1097 if(p->have_dtd && ParserGetFlag(p, WarnOnUndefinedAttributes))
1098 warn(p,
"Undeclared attribute %.*S for element %S; "
1099 "declaring it as CDATA #IMPLIED",
1100 p->namelen, p->name, p->xbit.element_definition->name);
1101 if(!(def = DefineAttributeN(p->xbit.element_definition,
1102 p->name, p->namelen,
1103 AT_cdata, 0, DT_implied, 0)))
1104 return error(p,
"System error");
1107 for(a = p->xbit.attributes; a; a = a->next)
1108 if(a->definition == def)
1109 return error(p,
"Repeated attribute %.*S", p->namelen, p->name);
1111 if(!(a = Malloc(
sizeof(*a))))
1112 return error(p,
"System error");
1115 a->next = p->xbit.attributes;
1116 p->xbit.attributes = a;
1117 a->definition = def;
1120 require(expect(p,
'=',
"after attribute name"));
1130 require(parse_string(p,
"in attribute value",
1131 a->definition->type == AT_cdata ? LT_cdata_attr :
1137 if(ParserGetFlag(p, ErrorOnUnquotedAttributeValues))
1138 return error(p,
"Value of attribute is unquoted");
1140 require(parse_nmtoken(p,
"in unquoted attribute value"));
1148 static int transcribe(Parser p,
int back,
int count)
1150 ExpandBuf(p->pbuf, p->pbufnext + count);
1151 memcpy(p->pbuf + p->pbufnext,
1152 p->source->line + p->source->next - back,
1153 count *
sizeof(Char));
1154 p->pbufnext += count;
1160 static int parse_pcdata(Parser p)
1167 if(p->state <= PS_prolog2)
1168 return error(p,
"Character data not allowed in prolog");
1169 if(p->state == PS_epilog)
1170 return error(p,
"Character data not allowed after body");
1175 buflen = s->line_length;
1186 require(transcribe(p, count, count));
1191 if(!ParserGetFlag(p, MergePCData))
1194 pop_while_at_eoe(p);
1199 buflen = s->line_length;
1207 if(!ParserGetFlag(p, XMLLessThan))
1212 if(buf[next] !=
'!' && buf[next] !=
'/' && buf[next] !=
'?' &&
1213 !is_xml_namestart(buf[next]))
1219 require(transcribe(p, count+1, count));
1222 if(!ParserGetFlag(p, ReturnComments) &&
1223 buflen >= next + 3 &&
1224 buf[next] ==
'!' && buf[next+1] ==
'-' && buf[next+2] ==
'-')
1227 require(parse_comment(p, 1));
1228 buflen = s->line_length;
1238 if(ParserGetFlag(p, IgnoreEntities))
1240 if(!ParserGetFlag(p, MergePCData) &&
1241 (p->pbufnext > 0 || count > 0))
1250 require(transcribe(p, count, count));
1254 if(buflen >= next+1 && buf[next] ==
'#')
1261 require(transcribe(p, count+2, count));
1264 require(parse_character_reference(p,
1265 ParserGetFlag(p, ExpandCharacterEntities)));
1268 if(!ParserGetFlag(p, MergePCData))
1278 require(transcribe(p, count+1, count));
1281 require(parse_reference(p, 0,
1282 ParserGetFlag(p, ExpandGeneralEntities),
1286 buflen = s->line_length;
1289 if(!ParserGetFlag(p, MergePCData))
1294 if(ParserGetFlag(p, XMLMiscWFErrors) &&
1295 buflen >= next + 2 &&
1296 buf[next] ==
']' && buf[next+1] ==
'>')
1297 return error(p,
"Illegal character sequence ']]>' in pcdata");
1307 p->pbuf[p->pbufnext++] = 0;
1308 p->xbit.type = XBIT_pcdata;
1309 p->xbit.pcdata_chars = p->pbuf;
1317 static int parse_comment(Parser p,
int skip)
1319 InputSource s = p->source;
1326 while((c =
get(s)) != XEOE)
1329 if(c1 ==
'-' && c2 ==
'-')
1334 return error(p,
"-- in comment");
1341 require(transcribe(p, count, count));
1349 return error(p,
"EOE in comment");
1354 require(transcribe(p, count, count-3));
1355 p->pbuf[p->pbufnext++] = 0;
1356 p->xbit.type = XBIT_comment;
1357 p->xbit.comment_chars = p->pbuf;
1363 static int parse_pi(Parser p)
1365 InputSource s = p->source;
1368 Char xml[] = {
'x',
'm',
'l', 0};
1370 require(parse_name(p,
"after <?"));
1371 CopyName(p->xbit.pi_name);
1375 if(Strcasecmp(p->xbit.pi_name, xml) == 0)
1377 if(ParserGetFlag(p, XMLStrictWFErrors))
1378 return error(p,
"Misplaced or wrong-case xml declaration");
1380 warn(p,
"Misplaced or wrong-case xml declaration; treating as PI");
1385 if(looking_at(p, ParserGetFlag(p, XMLPiEnd) ?
"?>" :
">"))
1387 ExpandBuf(p->pbuf, 0);
1394 if(c == XEOE || !is_xml_whitespace(c))
1395 return error(p,
"Expected whitespace after PI name");
1398 while((c =
get(s)) != XEOE)
1402 (!ParserGetFlag(p, XMLPiEnd) || c1 ==
'?'))
1406 require(transcribe(p, count, count));
1413 return error(p,
"EOE in PI");
1415 require(transcribe(p, count, count-(ParserGetFlag(p, XMLPiEnd) ? 2 : 1)));
1417 p->pbuf[p->pbufnext++] = 0;
1418 p->xbit.type = XBIT_pi;
1419 p->xbit.pi_chars = p->pbuf;
1425 static int parse_string(Parser p,
const char8 *where,
enum literal_type type)
1429 InputSource start_source, s;
1431 s = start_source = p->source;
1434 if(quote !=
'\'' && quote !=
'"')
1437 return error(p,
"Expected quoted string %s, but got %s",
1438 where, escape(quote));
1450 if(type == LT_plain || type == LT_entity ||
1451 !ParserGetFlag(p, NormaliseAttributeValues))
1458 require(transcribe(p, count+1, count));
1461 ExpandBuf(p->pbuf, p->pbufnext+1);
1462 p->pbuf[p->pbufnext++] =
' ';
1466 if((type == LT_tok_attr || type == LT_cdata_attr) &&
1467 ParserGetFlag(p, XMLMiscWFErrors))
1468 return error(p,
"Illegal character '<' %s", where);
1473 if(s == start_source)
1475 return error(p,
"Quoted string goes past entity end");
1479 require(transcribe(p, count, count));
1487 if(type != LT_entity)
1494 require(transcribe(p, count+1, count));
1497 if(p->external_pe_depth == 0)
1500 return error(p,
"PE ref not allowed here in internal subset");
1502 require(parse_reference(p, 1, 1, 1));
1507 if(ParserGetFlag(p, IgnoreEntities))
1509 if(type == LT_plain)
1517 require(transcribe(p, count+1, count));
1520 if(looking_at(p,
"#"))
1521 require(parse_character_reference(p,
1522 ParserGetFlag(p, ExpandCharacterEntities)));
1525 require(parse_reference(p, 0,
1526 type != LT_entity &&
1527 ParserGetFlag(p, ExpandGeneralEntities),
1528 !ParserGetFlag(p, XMLMiscWFErrors)));
1535 if(c == quote && p->source == start_source)
1540 if(at_eol(s) && count > 0)
1542 require(transcribe(p, count, count));
1549 require(transcribe(p, count+1, count));
1551 ExpandBuf(p->pbuf, p->pbufnext+1);
1552 p->pbuf[p->pbufnext++] = 0;
1554 if(ParserGetFlag(p, NormaliseAttributeValues) && type == LT_tok_attr)
1558 new = old = p->pbuf;
1572 if(type == LT_tok_attr &&
new[-1] ==
' ')
1583 if(
new > p->pbuf &&
new[-1] ==
' ')
1592 static int parse_dtd(Parser p)
1594 InputSource s = p->source;
1595 Entity parent = s->entity;
1596 Entity internal_part = 0, external_part = 0;
1598 char8 *publicid = 0, *systemid = 0;
1602 xbit.type = XBIT_dtd;
1604 require(parse_name(p,
"for name in dtd"));
1606 maybe_uppercase(p, name);
1610 require(parse_external_id(p, 0, &publicid, &systemid,
1611 ParserGetFlag(p, XMLExternalIDs),
1612 ParserGetFlag(p, XMLExternalIDs)));
1614 if(systemid || publicid)
1616 external_part = NewExternalEntity(0, publicid, systemid, 0, parent);
1620 return error(p,
"System error");
1625 if(looking_at(p,
"["))
1627 int line = s->line_number, cpos = s->next;
1629 require(read_markupdecls(p));
1631 internal_part = NewInternalEntity(0, p->pbuf, parent, line, cpos, 1);
1636 FreeEntity(external_part);
1637 return error(p,
"System error");
1641 require(expect(p,
'>',
"at end of dtd"));
1643 if(p->state == PS_prolog1)
1644 p->state = PS_prolog2;
1648 FreeEntity(external_part);
1649 FreeEntity(internal_part);
1651 if(ParserGetFlag(p, XMLStrictWFErrors))
1652 return error(p,
"Misplaced or repeated DOCTYPE declaration");
1654 warn(p,
"Misplaced or repeated DOCTYPE declaration");
1662 FreeEntity(external_part);
1663 FreeEntity(internal_part);
1669 p->dtd->name = name;
1670 p->dtd->internal_part = internal_part;
1671 p->dtd->external_part = external_part;
1673 if(ParserGetFlag(p, TrustSDD))
1677 ParseDtd(p, internal_part);
1678 if(p->xbit.type == XBIT_error)
1681 if(external_part && p->standalone != SDD_yes)
1683 ParseDtd(p, external_part);
1684 if(p->xbit.type == XBIT_error)
1693 static int read_markupdecls(Parser p)
1695 InputSource s = p->source;
1697 int c, d, hyphens=0;
1706 return error(p,
"EOE in DTD");
1720 require(transcribe(p, count+1, count));
1721 p->pbuf[p->pbufnext++] = 0;
1732 while((d =
get(s)) != XEOE)
1737 require(transcribe(p, count, count));
1744 return error(p,
"EOE in DTD");
1751 while((d =
get(s)) != XEOE)
1756 require(transcribe(p, count, count));
1767 return error(p,
"EOE in DTD");
1775 if(at_eol(s) && count > 0)
1777 require(transcribe(p, count, count));
1783 static int process_nsl_decl(Parser p)
1785 InputSource s = p->source;
1788 s->entity->ml_decl = ML_nsl;
1791 if(s->entity->encoding == CE_UTF_8)
1792 s->entity->encoding = CE_unspecified_ascii_superset;
1796 if(!looking_at(p,
"DDB "))
1797 return error(p,
"Expected \"DDB\" in NSL declaration");
1799 while(c =
get(s), !is_xml_whitespace(c))
1803 return error(p,
"EOE in NSL declaration");
1806 return error(p,
"Syntax error in NSL declaration");
1813 require(transcribe(p, count+1, count));
1814 p->pbuf[p->pbufnext++] = 0;
1817 if(!looking_at(p,
"0>"))
1818 return error(p,
"Expected \"0>\" at end of NSL declaration");
1820 if(!(s->entity->ddb_filename = strdup8(Chartochar8(p->pbuf))))
1821 return error(p,
"System error");
1826 static int process_xml_decl(Parser p)
1828 InputSource s = p->source;
1829 enum {None, V, E, S} which, last = None;
1832 CharacterEncoding enc = CE_unknown;
1835 s->entity->ml_decl = ML_xml;
1839 while(!looking_at(p,
"?>"))
1841 if(looking_at(p,
"version"))
1843 else if(looking_at(p,
"encoding"))
1845 else if(looking_at(p,
"standalone"))
1848 return error(p,
"Expected \"version\", \"encoding\" or "
1849 "\"standalone\" in XML declaration");
1853 if(ParserGetFlag(p, XMLStrictWFErrors))
1854 return error(p,
"Repeated or misordered attributes "
1855 "in XML declaration");
1856 warn(p,
"Repeated or misordered attributes in XML declaration");
1861 require(expect(p,
'=',
"after attribute name in XML declaration"));
1864 require(parse_string(p,
"for attribute value in XML declaration",
1867 maybe_uppercase(p, p->pbuf);
1872 if(!is_ascii_alpha(Value[0]))
1873 return error(p,
"Encoding name does not begin with letter");
1874 for(cp=Value+1; *cp; cp++)
1875 if(!is_ascii_alpha(*cp) && !is_ascii_digit(*cp) &&
1876 *cp !=
'.' && *cp !=
'_' && *cp !=
'-')
1877 return error(p,
"Illegal character %s in encoding name",
1880 value = Chartochar8(Value);
1882 enc = FindEncoding(value);
1883 if(enc == CE_unknown)
1884 return error(p,
"Unknown declared encoding %s", value);
1886 if(EncodingsCompatible(p->source->entity->encoding, enc, &enc))
1892 p->source->entity->encoding = enc;
1896 return error(p,
"Declared encoding %s is incompatible with %s "
1897 "which was used to read it",
1898 CharacterEncodingName[enc],
1899 CharacterEncodingName[p->source->entity->encoding]);
1901 s->entity->encoding_decl = enc;
1906 value = Chartochar8(Value);
1908 if(str_maybecase_cmp8(p, value,
"no") == 0)
1909 p->standalone = SDD_no;
1910 else if(str_maybecase_cmp8(p, value,
"yes") == 0)
1911 p->standalone = SDD_yes;
1913 return error(p,
"Expected \"yes\" or \"no\" "
1914 "for standalone in XML declaration");
1916 s->entity->standalone_decl = p->standalone;
1921 for(cp=Value; *cp; cp++)
1922 if(!is_ascii_alpha(*cp) && !is_ascii_digit(*cp) &&
1923 *cp !=
'.' && *cp !=
'_' && *cp !=
'-' && *cp !=
':')
1924 return error(p,
"Illegal character %s in version number",
1927 if(!s->entity->version_decl)
1928 if(!(s->entity->version_decl = strdup8(Chartochar8(Value))))
1929 return error(p,
"System error");
1935 else if(!is_xml_whitespace(c))
1936 return error(p,
"Expected whitespace or \"?>\" after attribute "
1937 "in XML declaration");
1943 static int parse_cdata(Parser p)
1945 InputSource s = p->source;
1949 if(p->state <= PS_prolog2)
1950 return error(p,
"Cdata section not allowed in prolog");
1951 if(p->state == PS_epilog)
1952 return error(p,
"Cdata section not allowed after body");
1956 while((c =
get(s)) != XEOE)
1959 if(c ==
'>' && c1 ==
']' && c2 ==
']')
1963 require(transcribe(p, count, count));
1970 return error(p,
"EOE in CData section");
1972 require(transcribe(p, count, count-3));
1973 p->pbuf[p->pbufnext++] = 0;
1974 p->xbit.type = XBIT_cdsect;
1975 p->xbit.cdsect_chars = p->pbuf;
1981 XBit ParseDtd(Parser p, Entity e)
1983 InputSource source, save;
1985 if(e->type == ET_external && p->entity_opener)
1986 source = p->entity_opener(e, p->callback_arg);
1988 source = EntityOpen(e);
1991 error(p,
"Couldn't open dtd entity %s", EntityDescription(e));
1997 if(ParserPush(p, source) == -1)
2002 p->external_pe_depth = (source->entity->type == ET_external);
2004 while(parse_markupdecl(p) == 0)
2007 p->external_pe_depth = 0;
2010 if(p->xbit.type != XBIT_error)
2022 static int parse_markupdecl(Parser p)
2026 int cur_line, cur_char;
2029 if(p->state == PS_error)
2030 return error(p,
"Attempt to continue reading DTD after error");
2032 clear_xbit(&p->xbit);
2034 require(skip_dtd_whitespace(p, 1));
2036 SourcePosition(s, &p->xbit.entity, &p->xbit.byte_offset);
2038 cur_ent = s->entity;
2039 cur_line = s->line_number;
2046 p->xbit.type = XBIT_none;
2049 if(looking_at(p,
"!ELEMENT"))
2051 require(expect_dtd_whitespace(p,
"after ELEMENT"));
2052 return parse_element_decl(p);
2054 else if(looking_at(p,
"!ATTLIST"))
2056 require(expect_dtd_whitespace(p,
"after ATTLIST"));
2057 return parse_attlist_decl(p);
2059 else if(looking_at(p,
"!ENTITY"))
2061 require(expect_dtd_whitespace(p,
"after ENTITY"));
2062 return parse_entity_decl(p, cur_ent, cur_line, cur_char);
2064 else if(looking_at(p,
"!NOTATION"))
2066 require(expect_dtd_whitespace(p,
"after NOTATION"));
2067 return parse_notation_decl(p);
2069 else if(looking_at(p,
"!["))
2070 return parse_conditional(p);
2071 else if(looking_at(p,
"?"))
2073 require(parse_pi(p));
2075 p->dtd_callback(&p->xbit, p->callback_arg);
2080 else if(looking_at(p,
"!--"))
2082 if(ParserGetFlag(p, ReturnComments))
2084 require(parse_comment(p, 0));
2086 p->dtd_callback(&p->xbit, p->callback_arg);
2092 return parse_comment(p, 1);
2095 return error(p,
"Syntax error after < in dtd");
2098 return error(p,
"Expected \"<\" in dtd, but got %s", escape(c));
2102 static int parse_reference(Parser p,
int pe,
int expand,
int allow_external)
2107 require(parse_name(p, pe ?
"for parameter entity" :
"for entity"));
2108 require(expect(p,
';',
"after entity name"));
2111 return transcribe(p, 1 + p->namelen + 1, 1 + p->namelen + 1);
2113 e = FindEntityN(p->dtd, p->name, p->namelen, pe);
2120 if(pe || ParserGetFlag(p, ErrorOnUndefinedEntities))
2121 return error(p,
"Undefined%s entity %.*S",
2122 pe ?
" parameter" :
"" ,
2123 p->namelen > 50 ? 50 : p->namelen, p->name);
2125 warn(p,
"Undefined%s entity %.*S",
2126 pe ?
" parameter" :
"",
2127 p->namelen > 50 ? 50 : p->namelen, p->name);
2131 buf = Malloc((5 + p->namelen + 1 + 1) *
sizeof(Char));
2133 return error(p,
"System error");
2135 *q++ =
'&'; *q++ =
'#'; *q++ =
'3'; *q++ =
'8'; *q++ =
';';
2136 for(i=0; i<p->namelen; i++)
2141 if(!(e = NewInternalEntityN(p->name, p->namelen, buf, 0, 0, 0, 0)))
2142 return error(p,
"System error");
2143 if(!DefineEntity(p->dtd, e, 0))
2144 return error(p,
"System error");
2147 if(!allow_external && e->type == ET_external)
2148 return error(p,
"Illegal reference to external entity");
2150 for(s = p->source; s; s = s->parent)
2152 return error(p,
"Recursive reference to entity \"%S\"", e->name);
2154 if(e->type == ET_external && p->entity_opener)
2155 s = p->entity_opener(e, p->callback_arg);
2159 return error(p,
"Couldn't open entity %S, %s",
2160 e->name, EntityDescription(e));
2162 require(ParserPush(p, s));
2167 static int parse_character_reference(Parser p,
int expand)
2169 InputSource s = p->source;
2172 unsigned int code = 0;
2173 Char *ch = s->line + s->next;
2175 if(looking_at(p,
"x"))
2181 while((c =
get(s)) !=
';')
2183 if((c >=
'0' && c <=
'9') ||
2184 (base == 16 && ((c >=
'A' && c <=
'F') ||
2185 (c >=
'a' && c <=
'f'))))
2191 "Illegal character %s in base-%d character reference",
2197 return transcribe(p, 2 + (base == 16) + count + 1,
2198 2 + (base == 16) + count + 1);
2203 if(c >=
'0' && c <=
'9')
2204 code = code * base + (c -
'0');
2205 else if(c >=
'A' && c <=
'F')
2206 code = code * base + 10 + (c -
'A');
2208 code = code * base + 10 + (c -
'a');
2212 if(code > 255 || !is_xml_legal(code))
2214 if(ParserGetFlag(p, ErrorOnBadCharacterEntities))
2215 return error(p,
"0x%x is not a valid 8-bit XML character", code);
2217 warn(p,
"0x%x is not a valid 8-bit XML character; ignored", code);
2221 if(!is_xml_legal(code))
2223 if(ParserGetFlag(p, ErrorOnBadCharacterEntities))
2224 return error(p,
"0x%x is not a valid UTF-16 XML character", code);
2226 warn(p,
"0x%x is not a valid UTF-16 XML character; ignored", code);
2234 ExpandBuf(p->pbuf, p->pbufnext+2);
2237 p->pbuf[p->pbufnext++] = (code >> 10) + 0xd800;
2238 p->pbuf[p->pbufnext++] = (code & 0x3ff) + 0xdc00;
2244 ExpandBuf(p->pbuf, p->pbufnext+1);
2245 p->pbuf[p->pbufnext++] = code;
2252 static int parse_element_decl(Parser p)
2256 ElementDefinition def;
2261 Char pcdata[] = {
'#',
'P',
'C',
'D',
'A',
'T',
'A',0};
2265 require(parse_name(p,
"for name in element declaration"));
2267 maybe_uppercase(p, name);
2269 require(expect_dtd_whitespace(p,
"after name in element declaration"));
2271 if(looking_at(p,
"EMPTY"))
2276 else if(looking_at(p,
"ANY"))
2283 if(looking_at(p,
"("))
2286 if(!(cp = parse_cp(p)) ||
2287 check_content_decl(p, cp) < 0 ||
2288 !(content = stringify_cp(cp)))
2290 FreeContentParticle(cp);
2296 if(cp->type == CP_choice && cp->children[0]->type == CP_pcdata)
2302 FreeContentParticle(cp);
2307 return error(p,
"Expected \"EMPTY\", \"ANY\", or \"(\" after name in "
2308 "element declaration");
2318 while((c =
get(p->source)) !=
'>')
2324 require(transcribe(p, count, count));
2325 if(!p->source->parent)
2326 return error(p,
"EOE in element declaration");
2332 require(transcribe(p, count+1, count));
2333 if(p->external_pe_depth == 0)
2337 "PE ref not allowed here in internal subset");
2339 require(parse_reference(p, 1, 1, 1));
2344 if(at_eol(p->source))
2346 require(transcribe(p, count, count));
2353 require(transcribe(p, count, count));
2354 p->pbuf[p->pbufnext++] = 0;
2356 if(Strstr(p->pbuf, pcdata))
2365 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2366 require(expect(p,
'>',
"at end of element declaration"));
2368 if((def = FindElement(p->dtd, name)))
2371 RedefineElement(def, type, content);
2375 if(ParserGetFlag(p, WarnOnRedefinitions))
2376 warn(p,
"Ignoring redeclaration of element %S", name);
2380 if (!DefineElement(p->dtd, name, type, content)) {
2381 return error(p,
"System error");
2391 static ContentParticle parse_cp(Parser p)
2395 if(looking_at(p,
"("))
2397 if(!(cp = parse_choice_or_seq(p)))
2400 else if(looking_at(p,
"#PCDATA"))
2402 if(!(cp = Malloc(
sizeof(*cp))))
2404 error(p,
"System error");
2408 cp->type = CP_pcdata;
2412 if(parse_name(p,
"in content declaration") < 0)
2415 if(!(cp = Malloc(
sizeof(*cp))))
2417 error(p,
"System error");
2422 CopyName0(cp->name);
2425 if(looking_at(p,
"*"))
2426 cp->repetition =
'*';
2427 else if(looking_at(p,
"+"))
2428 cp->repetition =
'+';
2429 else if(looking_at(p,
"?"))
2430 cp->repetition =
'?';
2439 static ContentParticle parse_choice_or_seq(Parser p)
2441 ContentParticle cp, cp1;
2444 require0(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2446 if(!(cp1 = parse_cp(p)))
2449 require0(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2451 if(!(cp = parse_choice_or_seq_1(p, 1, 0)))
2452 FreeContentParticle(cp1);
2454 cp->children[0] = cp1;
2461 static ContentParticle parse_choice_or_seq_1(Parser p,
int nchildren,
char sep)
2463 ContentParticle cp = 0, cp1;
2464 int nsep =
get(p->source);
2470 if(!(cp = Malloc(
sizeof(*cp))) ||
2471 !(cp->children = Malloc(nchildren *
sizeof(cp))))
2474 error(p,
"System error");
2482 cp->type = sep ==
',' ? CP_seq : CP_choice;
2483 cp->nchildren = nchildren;
2488 if(nsep !=
'|' && nsep !=
',')
2490 error(p,
"Expected | or , or ) in content declaration, got %s",
2495 if(sep && nsep != sep)
2497 error(p,
"Content particle contains both | and ,");
2501 require0(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2503 if(!(cp1 = parse_cp(p)))
2506 require0(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2508 if(!(cp = parse_choice_or_seq_1(p, nchildren+1, (
char)nsep)))
2509 FreeContentParticle(cp1);
2511 cp->children[nchildren] = cp1;
2518 static int check_content_decl(Parser p, ContentParticle cp)
2522 if(cp->type == CP_choice && cp->children[0]->type == CP_pcdata)
2524 for(i=1; i<cp->nchildren; i++)
2525 if(cp->children[i]->type != CP_name)
2526 return error(p,
"Invalid mixed content declaration");
2528 if(cp->repetition !=
'*' &&
2529 !(cp->nchildren == 1 && cp->repetition == 0))
2530 return error(p,
"Invalid mixed content declaration");
2535 return check_content_decl_1(p, cp);
2538 static int check_content_decl_1(Parser p, ContentParticle cp)
2545 return error(p,
"Misplaced #PCDATA in content declaration");
2548 for(i=0; i<cp->nchildren; i++)
2549 if(check_content_decl_1(p, cp->children[i]) < 0)
2559 static Char *stringify_cp(ContentParticle cp)
2561 int size = size_cp(cp);
2565 if(!(s = Malloc((size+1) *
sizeof(Char))) ||
2566 !(f = MakeFILE16FromString(s, (size + 1) *
sizeof(Char),
"w")))
2580 static void print_cp(ContentParticle cp, FILE16 *f)
2587 Fprintf(f,
"#PCDATA");
2590 Fprintf(f,
"%S", cp->name);
2595 for(i=0; i<cp->nchildren; i++)
2598 Fprintf(f, cp->type == CP_seq ?
"," :
"|");
2599 print_cp(cp->children[i], f);
2606 Fprintf(f,
"%c", cp->repetition);
2609 static int size_cp(ContentParticle cp)
2619 s = Strlen(cp->name);
2623 for(i=0; i<cp->nchildren; i++)
2627 s += size_cp(cp->children[i]);
2638 void FreeContentParticle(ContentParticle cp)
2654 for(i=0; i<cp->nchildren; i++)
2655 FreeContentParticle(cp->children[i]);
2665 static int parse_attlist_decl(Parser p)
2668 ElementDefinition element;
2670 DefaultType default_type;
2671 Char **allowed_values, *t;
2672 Char *default_value;
2675 require(parse_name(p,
"for name in attlist declaration"));
2677 maybe_uppercase(p, name);
2679 if(!(element = FindElement(p->dtd, name)))
2681 if(!(element = TentativelyDefineElement(p->dtd, name)))
2682 return error(p,
"System error");
2686 require(expect_dtd_whitespace(p,
2687 "after element name in attlist declaration"));
2689 while(!looking_at(p,
">"))
2691 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2692 require(parse_name(p,
"for attribute in attlist declaration"));
2694 maybe_uppercase(p, name);
2696 require(expect_dtd_whitespace(p,
"after name in attlist declaration"));
2698 if(looking_at(p,
"CDATA"))
2700 else if(looking_at(p,
"IDREFS"))
2702 else if(looking_at(p,
"IDREF"))
2704 else if(looking_at(p,
"ID"))
2706 else if(looking_at(p,
"ENTITIES"))
2708 else if(looking_at(p,
"ENTITY"))
2710 else if(looking_at(p,
"NMTOKENS"))
2712 else if(looking_at(p,
"NMTOKEN"))
2714 else if(looking_at(p,
"NOTATION"))
2717 type = AT_enumeration;
2719 if(type != AT_enumeration)
2721 require(expect_dtd_whitespace(p,
"after attribute type"));
2724 if(type == AT_notation || type == AT_enumeration)
2726 require(expect(p,
'(',
2727 "or keyword for type in attlist declaration"));
2733 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2734 if(type == AT_notation)
2735 require(parse_name(p,
2736 "for notation value in attlist declaration"));
2738 require(parse_nmtoken(p,
2739 "for enumerated value in attlist declaration"));
2740 maybe_uppercase_name(p);
2741 ExpandBuf(p->pbuf, p->pbufnext + p->namelen + 1);
2742 memcpy(p->pbuf+p->pbufnext,
2744 p->namelen *
sizeof(Char));
2745 p->pbuf[p->pbufnext + p->namelen] = 0;
2746 p->pbufnext += (p->namelen + 1);
2748 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2750 while(looking_at(p,
"|"));
2752 require(expect(p,
')',
2753 "at end of enumerated value list in attlist declaration"));
2754 require(expect_dtd_whitespace(p,
"after enumerated value list "
2755 "in attlist declaration"));
2757 allowed_values = Malloc((nvalues+1)*
sizeof(Char *));
2759 return error(p,
"System error");
2760 for(i=0, t=p->pbuf; i<nvalues; i++)
2762 allowed_values[i] = t;
2766 allowed_values[nvalues] = 0;
2773 if(looking_at(p,
"#REQUIRED"))
2774 default_type = DT_required;
2775 else if(looking_at(p,
"#IMPLIED"))
2776 default_type = DT_implied;
2777 else if(looking_at(p,
"#FIXED"))
2779 default_type = DT_fixed;
2780 require(expect_dtd_whitespace(p,
"after #FIXED"));
2783 default_type = DT_none;
2785 if(default_type == DT_fixed || default_type == DT_none)
2787 require(parse_string(p,
2788 "for default value in attlist declaration",
2789 type == AT_cdata ? LT_cdata_attr :
2791 default_value = p->pbuf;
2793 if(type != AT_cdata && type != AT_entity && type != AT_entities)
2794 maybe_uppercase(p, default_value);
2799 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2801 if(FindAttribute(element, name))
2803 if(ParserGetFlag(p, WarnOnRedefinitions))
2804 warn(p,
"Ignoring redeclaration of attribute %S", name);
2807 Free(allowed_values[0]);
2808 Free(allowed_values);
2811 Free(default_value);
2814 if(!DefineAttribute(element, name, type, allowed_values,
2815 default_type, default_value))
2816 return error(p,
"System error");
2827 static int parse_external_id(Parser p,
int required,
2828 char8 **publicid, char8 **systemid,
2831 InputSource s = p->source;
2838 if(looking_at(p,
"SYSTEM"))
2843 c =
get(s); unget(s);
2844 if(c !=
'"' && c !=
'\'')
2848 require(expect_dtd_whitespace(p,
"after SYSTEM"));
2850 require(parse_string(p,
"for system ID", LT_plain));
2851 if(!(*systemid = strdup8(Chartochar8(p->pbuf))))
2852 return error(p,
"System error");
2854 else if(looking_at(p,
"PUBLIC"))
2859 c =
get(s); unget(s);
2860 if(c !=
'"' && c !=
'\'')
2864 require(expect_dtd_whitespace(p,
"after PUBLIC"));
2866 require(parse_string(p,
"for public ID", LT_plain));
2868 for(cp=p->pbuf; *cp; cp++)
2869 if(!is_ascii_alpha(*cp) && !is_ascii_digit(*cp) &&
2870 strchr8(
"-'()+,./:=?;!*#@$_% \r\n", *cp) == 0)
2871 return error(p,
"Illegal character %s in public id",
2874 if(!(*publicid = strdup8(Chartochar8(p->pbuf))))
2875 return error(p,
"System error");
2880 c =
get(s); unget(s);
2881 if(c !=
'"' && c !=
'\'')
2885 require(expect_dtd_whitespace(p,
"after public id"));
2887 require(parse_string(p,
"for system ID", LT_plain));
2888 if(!(*systemid = strdup8(Chartochar8(p->pbuf))))
2889 return error(p,
"System error");
2892 return error(p,
"Missing or invalid external ID");
2899 static int parse_entity_decl(Parser p, Entity ent,
int line,
int chpos)
2905 pe = looking_at(p,
"%");
2908 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2909 require(parse_name(p,
"for name in entity declaration"));
2912 require(expect_dtd_whitespace(p,
"after name in entity declaration"));
2914 if(looking_at(p,
"'") || looking_at(p,
"\""))
2919 require(parse_string(p,
"for value in entity declaration", LT_entity));
2923 if(!(e = NewInternalEntity(name, value, ent, line, chpos, 0)))
2924 return error(p,
"System error");
2928 char8 *publicid, *systemid;
2929 NotationDefinition notation = 0;
2931 require(parse_external_id(p, 1, &publicid, &systemid, 1, 1));
2933 require((t = skip_dtd_whitespace(p, p->external_pe_depth > 0)));
2934 if(looking_at(p,
"NDATA"))
2937 return error(p,
"Whitespace missing before NDATA");
2939 return error(p,
"NDATA not allowed for parameter entity");
2940 require(expect_dtd_whitespace(p,
"after NDATA"));
2941 require(parse_name(p,
"for notation name in entity declaration"));
2942 maybe_uppercase_name(p);
2943 notation = FindNotationN(p->dtd, p->name, p->namelen);
2947 TentativelyDefineNotationN(p->dtd, p->name, p->namelen);
2949 return error(p,
"System error");
2953 if(!(e = NewExternalEntity(name, publicid, systemid, notation, ent)))
2954 return error(p,
"System error");
2959 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2960 require(expect(p,
'>',
"at end of entity declaration"));
2962 if((old = FindEntity(p->dtd, e->name, pe)) &&
2963 old->parent != xml_builtin_entity)
2965 if(ParserGetFlag(p, WarnOnRedefinitions))
2966 warn(p,
"Ignoring redefinition of%s entity %S",
2967 pe ?
" parameter" :
"", e->name);
2970 if(!DefineEntity(p->dtd, e, pe))
2971 return error(p,
"System error");
2978 static int parse_notation_decl(Parser p)
2981 char8 *publicid, *systemid;
2982 NotationDefinition def;
2984 require(parse_name(p,
"for name in notation declaration"));
2986 maybe_uppercase(p, name);
2988 require(expect_dtd_whitespace(p,
"after name in notation declaration"));
2990 require(parse_external_id(p, 1, &publicid, &systemid, 1, 0));
2992 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
2993 require(expect(p,
'>',
"at end of notation declaration"));
2995 if((def = FindNotation(p->dtd, name)))
2998 RedefineNotation(def, publicid, systemid);
3000 if(ParserGetFlag(p, WarnOnRedefinitions))
3002 warn(p,
"Ignoring redefinition of notation %S", name);
3003 if(publicid) Free(publicid);
3004 if(systemid) Free(systemid);
3009 if(!DefineNotation(p->dtd, name, publicid, systemid))
3010 return error(p,
"System error");
3018 static int parse_conditional(Parser p)
3022 if(p->external_pe_depth == 0)
3023 return error(p,
"Conditional section not allowed in internal subset");
3025 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3026 if(looking_at(p,
"INCLUDE"))
3028 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3029 require(expect(p,
'[',
"at start of conditional section"));
3030 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3031 while(!looking_at(p,
"]"))
3033 switch(parse_markupdecl(p))
3036 return error(p,
"EOF in conditional section");
3040 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3043 if(!looking_at(p,
"]>"))
3044 return error(p,
"]> required after ] in conditional section");
3046 else if(looking_at(p,
"IGNORE"))
3050 require(skip_dtd_whitespace(p, p->external_pe_depth > 0));
3051 require(expect(p,
'[',
"at start of conditional section"));
3055 switch(
get(p->source))
3058 if(p->source->parent)
3061 return error(p,
"EOE in ignored conditional section");
3064 if(looking_at(p,
"!["))
3068 if(looking_at(p,
"]>"))
3074 return error(p,
"INCLUDE or IGNORE required in conditional section");
3079 static void maybe_uppercase(Parser p, Char *s)
3081 if(ParserGetFlag(p, CaseInsensitive))
3089 static void maybe_uppercase_name(Parser p)
3093 if(ParserGetFlag(p, CaseInsensitive))
3094 for(i=0; i<p->namelen; i++)
3095 p->name[i] = Toupper(p->name[i]);
3098 static int str_maybecase_cmp8(Parser p,
const char8 *a,
const char8 *b)
3101 ParserGetFlag(p, CaseInsensitive) ? strcasecmp8(a, b) : strcmp8(a, b);
3104 static int is_ascii_alpha(
int c)
3106 return (c >=
'a' && c <=
'z') || (c >=
'A' && c <=
'Z');
3109 static int is_ascii_digit(
int c)
3111 return c >=
'0' && c <=
'9';
3116 static void verror(XBit bit,
const char8 *format, va_list args)
3119 static char8 message[400];
3122 Vsprintf(message, CE_ISO_8859_1, format, args);
3125 bit->type = XBIT_error;
3126 bit->error_message = message;
3129 static int error(Parser p,
const char8 *format, ...)
3133 va_start(args, format);
3134 verror(&p->xbit, format, args);
3136 p->state = PS_error;
3141 static void warn(Parser p,
const char8 *format, ...)
3144 static struct xbit bit;
3146 va_start(args, format);
3147 verror(&bit, format, args);
3149 bit.type = XBIT_warning;
3151 if(p->warning_callback)
3152 p->warning_callback(&bit, p->callback_arg);
3154 ParserPerror(p, &bit);