44 #include "EST_string_aux.h"
45 #include "EST_FileType.h"
46 #include "EST_Token.h"
47 #include "ling_class/EST_Utterance.h"
48 #include "EST_UtteranceFile.h"
60 static EST_write_status utt_save_all_contents(ostream &outf,
63 static EST_write_status utt_save_all_contents(ostream &outf,
67 static EST_write_status utt_save_ling_content(ostream &outf,
77 if (node->unref_relation(
"__READ__"))
97 if ((r = read_est_header(ts, hinfo, ascii, t)) != format_ok)
99 if (t != est_file_utterance)
100 return misc_read_error;
101 if (hinfo.
ival(
"version") != 2)
103 if (hinfo.
ival(
"version") == 3)
104 EST_warning(
"Loading est utterance format version 3, ladders will not be understood");
107 EST_error(
"utt_load: %s wrong version of utterance format expected 2 (or 3) but found %d",
113 if (ts.
get() !=
"Features")
116 " missing utterance features section" << endl;
117 return misc_read_error;
122 if (ts.
get() !=
"Stream_Items")
125 " missing Items section" << endl;
126 return misc_read_error;
129 r = load_all_contents(ts, sitems, max_id);
132 if (ts.
peek() ==
"Streams")
134 cerr <<
"utt.load: streams found in utterance file, " <<
135 "no longer supported" << endl;
136 return misc_read_error;
140 if ((r == format_ok) && (ts.
get() !=
"Relations"))
143 " missing Relations section" << endl;
144 return misc_read_error;
147 r = load_relations(ts, u, sitems);
149 if ((r == format_ok) && (ts.
get() !=
"End_of_Utterance"))
152 " End_of_Utterance expected but not found" << endl;
153 return misc_read_error;
160 for(
int ni=0; ni < sitems.length(); ni++)
183 while (ts.
peek() !=
"End_of_Stream_Items")
191 Sid = ts.
get().string();
197 " Item name not a number: " << Sid << endl;
198 return misc_read_error;
200 if (
id >= sitems.
length())
206 if (si->
f.
load(ts) != format_ok)
207 return misc_read_error;
208 idval = si->
f.
I(
"id",0);
212 return misc_read_error;
228 while (ts.
peek() !=
"End_of_Relations")
234 if (r->
load(ts,sitems) != format_ok)
235 return misc_read_error;
241 return misc_read_error;
250 EST_write_status EST_UtteranceFile::save_est_ascii(ostream &outf,
const EST_Utterance &utt)
252 EST_write_status v = write_ok;
255 outf.setf(ios::fixed, ios::floatfield);
258 outf <<
"EST_File utterance\n";
259 outf <<
"DataType ascii\n";
260 outf <<
"version 2\n";
261 outf <<
"EST_Header_End\n";
268 outf <<
"Stream_Items\n";
270 v = utt_save_all_contents(outf,utt,sinames);
271 if (v == write_fail)
return v;
272 outf <<
"End_of_Stream_Items\n";
275 outf <<
"Relations\n";
279 v = relation(p->v)->save(outf,sinames);
280 if (v == write_fail)
return v;
282 outf <<
"End_of_Relations\n";
284 outf <<
"End_of_Utterance\n";
288 static EST_write_status utt_save_all_contents(ostream &outf,
297 EST_write_status v = write_ok;
303 v = utt_save_all_contents(outf,relation(p->v)->head(),
305 if (v == write_fail)
return v;
311 static EST_write_status utt_save_all_contents(ostream &outf,
320 utt_save_ling_content(outf,n,sinames,si_count);
323 utt_save_all_contents(outf,n->next(),sinames,si_count);
324 utt_save_all_contents(outf,n->down(),sinames,si_count);
329 static EST_write_status utt_save_ling_content(ostream &outf,
336 if ((si != 0) && (!sinames.
present(si->contents())))
338 sinames.
add_item(si->contents(),si_count);
339 outf << si_count <<
" ";
340 si->features().
save(outf);
352 EST_read_status status = read_ok;
358 status = rel->
load(
"", ts,
"esps");
373 EST_write_status EST_UtteranceFile::save_xlabel(ostream &outf,
376 EST_write_status status = write_error;
384 rel = ::relation(p->v);
391 if (hd->up() || hd->down())
398 return rel->
save(outf,
"esps", 0);
406 #if defined(INCLUDE_XML_FORMATS)
421 long pos=ftell(stream);
426 fgets(buf, 80, stream);
428 if (strncmp(buf,
"<?xml", 5) != 0)
429 return read_format_error;
431 fgets(buf, 80, stream);
433 if (strncmp(buf,
"<!DOCTYPE apml", 14) != 0)
434 return read_format_error;
437 fseek(stream, pos, 0);
439 EST_read_status stat = apml_read(stream, ts.
filename(),u, max_id);
442 fseek(stream, pos, 0);
459 long pos=ftell(stream);
464 fgets(buf, 80, stream);
466 if (strncmp(buf,
"<?xml", 5) != 0)
467 return read_format_error;
470 fseek(stream, pos, 0);
472 EST_read_status stat = EST_GenXML::read_xml(stream, ts.
filename(),u, max_id);
475 fseek(stream, pos, 0);
480 EST_write_status EST_UtteranceFile::save_genxml(ostream &outf,
483 EST_write_status status=write_ok;
498 for (fp.
begin(hd->features()); fp; ++fp)
499 features.add_item(fp->k, 1);
504 outf <<
"<?xml version='1.0'?>\n";
506 outf <<
"<!DOCTYPE utterance PUBLIC '//CSTR EST//DTD cstrutt//EN' 'cstrutt.dtd'\n\t[\n";
510 outf <<
"\t<!ATTLIST item\n";
511 for (f.
begin(features); f; ++f)
515 outf <<
"\t\t" << f->k <<
"\tCDATA #IMPLIED\n";
523 outf <<
"<utterance>\n";
525 outf <<
"<language name='unknown'/>\n";
536 if (hd->up() || hd->down())
544 outf <<
"<relation name='"<< rel->
name()<<
"' structure-type='list'>\n";
552 for (p.
begin(hd->features()); p; ++p)
553 if (p->k !=
"estContentFeature")
554 outf <<
" " << p->k <<
"='" << p->v <<
"'\n";
561 outf <<
"</relation>\n";
564 status=write_partial;
568 outf <<
"</utterance>\n";
575 EST_String EST_UtteranceFile::options_short(
void)
579 for(
int n=0; n< EST_UtteranceFile::map.n() ; n++)
581 EST_UtteranceFileType type = EST_UtteranceFile::map.nth_token(n);
582 if (type != uff_none)
584 for(
int ni=0; ni<NAMED_ENUM_MAX_SYNONYMS; ni++)
586 const char *nm = EST_UtteranceFile::map.name(type, ni);
600 EST_String EST_UtteranceFile::options_supported(
void)
602 EST_String s(
"Available utterance file formats:\n");
604 for(
int n=0; n< EST_UtteranceFile::map.n() ; n++)
606 EST_UtteranceFileType type = EST_UtteranceFile::map.nth_token(n);
607 if (type != uff_none)
609 const char *d = EST_UtteranceFile::map.info(type).description;
610 for(
int ni=0; ni<NAMED_ENUM_MAX_SYNONYMS; ni++)
612 const char *nm = EST_UtteranceFile::map.name(type, ni);
626 Start_TNamedEnumI_T(EST_UtteranceFileType, EST_UtteranceFile::Info, EST_UtteranceFile::map, utterancefile)
627 { uff_none, { NULL },
628 { FALSE, NULL, NULL,
"unknown utterance file type"} },
629 { uff_est, {
"est",
"est_ascii"},
630 { TRUE, EST_UtteranceFile::load_est_ascii, EST_UtteranceFile::save_est_ascii,
"Standard EST Utterance File" } },
631 #if defined(INCLUDE_XML_FORMATS)
632 { uff_apml, {
"apml",
"xml"},
633 { TRUE, EST_UtteranceFile::load_apml, NULL,
"Utterance in APML" } },
634 { uff_genxml, {
"genxml",
"xml"},
635 { TRUE, EST_UtteranceFile::load_genxml, EST_UtteranceFile::save_genxml,
"Utterance in XML, Any DTD" } },
637 { uff_xlabel, {
"xlabel"},
638 { TRUE, EST_UtteranceFile::load_xlabel, EST_UtteranceFile::save_xlabel,
"Xwaves Label File" } },
640 { FALSE, NULL, NULL,
"unknown utterance file type"} }
642 End_TNamedEnumI_T(EST_UtteranceFileType, EST_UtteranceFile::Info, EST_UtteranceFile::map, utterancefile)
646 #if defined(INSTANTIATE_TEMPLATES)
647 #include "../base_class/EST_TNamedEnum.cc"
648 Instantiate_TNamedEnumI(EST_UtteranceFileType, EST_UtteranceFile::Info)
653 #if defined(INSTANTIATE_TEMPLATES)
655 #include "../base_class/EST_TSimpleVector.cc"
656 #include "../base_class/EST_TVector.cc"
657 #include "../base_class/EST_Tvectlist.cc"