42 #include "EST_THash.h"
43 #include "EST_error.h"
45 #include "rxp/XML_Parser.h"
47 static EST_Regex simpleIDRegex(
".*#id(w\\([0-9]+\\))");
48 static EST_Regex rangeIDRegex(
".*#id(w\\([0-9]+\\)).*id(w\\([0-9]+\\))");
49 static EST_Regex RXpunc(
"[\\.,\\?\\!\"]+");
106 const char *instruction);
116 for(them.
begin(attributes); them ; them++)
118 (
const char *)them->k,
119 (
const char *)them->v);
122 EST_read_status apml_read(FILE *file,
128 (void)print_attributes;
129 Apml_Parser_Class pclass;
136 XML_Parser *parser = pclass.make_parser(file, name, &state);
140 return read_format_error;
159 Parse_State *state = (Parse_State *)data;
166 state->last_token=NULL;
169 state->perf = state->utt->create_relation(
"Perfomative");
170 state->com = state->utt->create_relation(
"Communicative");
171 state->tokens = state->utt->create_relation(
"Token");
172 state->semstruct = state->utt->create_relation(
"SemStructure");
173 state->emphasis = state->utt->create_relation(
"Emphasis");
174 state->boundary = state->utt->create_relation(
"Boundary");
175 state->pause = state->utt->create_relation(
"Pause");
184 (void)c; (void)p; (void)data;
194 (void)c; (void)p; (void)attributes;
195 Parse_State *state = (Parse_State *)data;
199 if (strcmp(name,
"turnallocation")==0)
205 if (strcmp(name,
"apml")==0)
210 if( strcmp(name,
"performative")==0
211 || strcmp(name,
"rheme")==0
212 || strcmp(name,
"theme")==0
213 || strcmp(name,
"emphasis")==0
214 || strcmp(name,
"boundary")==0
215 || strcmp(name,
"pause")==0)
223 for(them.
begin(attributes); them ; them++)
232 if( strcmp(name,
"emphasis")==0 )
234 item = state->emphasis->append();
235 state->pending = item;
237 else if(strcmp(name,
"boundary")==0 )
239 item = state->boundary->append();
240 if(state->last_token)
241 item->append_daughter(state->last_token);
243 else if(strcmp(name,
"pause")==0 )
245 item = state->pause->append();
246 if(state->last_token)
247 item->append_daughter(state->last_token);
251 if (state->parent == NULL)
252 item = state->semstruct->append();
254 item = state->parent->append_daughter();
258 item->set_contents(cont);
263 EST_warning(
"APML Parser: unknown element %s", name);
273 (void)c; (void)p; (void)attributes;
275 element_open(c, p, data, name, attributes);
276 element_close(c, p, data, name);
285 (void)c; (void)p; (void)name;
286 Parse_State *state = (Parse_State *)data;
288 if ( strcmp(name,
"emphasis")==0
289 || strcmp(name,
"boundary")==0
290 || strcmp(name,
"pause")==0 )
297 if (strcmp(name,
"performative")==0
298 || strcmp(name,
"theme")==0
299 || strcmp(name,
"rheme")==0)
302 state->pending = NULL;
303 state->parent=state->parent->up();
315 Parse_State *state = (Parse_State *)data;
318 split(chars,strings,255,RXwhite);
325 while( s < 1 || strings[s].length() > 0 )
327 if(strings[s].length() > 0 )
330 if(strings[s].matches(RXpunc))
332 state->last_token->set(
"punc",strings[s]);
340 if (state->parent == NULL)
341 item = state->semstruct->append();
343 item = state->parent->append_daughter();
344 item->set_contents(cont);
347 int i = strings[s].
index(RXpunc);
350 if( ps.
length() > 0 && i == 0)
352 cout <<
"Got pre punc: " << ps << endl;
353 intermediate = strings[s].
after(RXpunc);
355 item->
set(
"prepunctuation",ps);
359 intermediate = strings[s];
360 item->
set(
"prepunctuation",
"");
363 ps = intermediate.
at(RXpunc);
366 cout <<
"Got punc: " << ps << endl;
368 item->
set(
"punc",ps);
373 item->
set(
"punc",
"");
376 state->tokens->append(item);
377 state->last_token = item;
381 state->pending->append_daughter(item);
400 (void)c; (void)p; (void)data; (void)chars;
410 const char *instruction)
413 Parse_State *state = (Parse_State *)data;
415 printf(
"APML XML Parser [proc[%s]] %d\n", instruction, state->depth);
423 (void)c; (void)p; (void)data;
426 EST_error(
"APML Parser %s", get_error(p));