Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
est_file.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1994,1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor */
34 /* Date : March 1998 */
35 /*-----------------------------------------------------------------------*/
36 /* File functions for EST type files */
37 /* */
38 /*=======================================================================*/
39 
40 #include "EST_FileType.h"
41 #include "EST_TNamedEnum.h"
42 #include "EST_Token.h"
43 #include "EST_Option.h"
44 #include "EST_Features.h"
45 
47 estfile_names[] =
48 {
49  { est_file_none, { "None" }},
50  { est_file_track, { "Track", "track" }},
51  { est_file_wave, { "wave" }},
52  { est_file_label, { "label" }},
53  { est_file_utterance, { "utterance" }},
54  { est_file_fmatrix, { "fmatrix" }},
55  { est_file_fvector, { "fvector" }},
56  { est_file_dmatrix, { "dmatrix" }},
57  { est_file_dvector, { "dvector" }},
58  { est_file_feature_data, { "feature_data" }},
59  { est_file_fst, { "fst" }},
60  { est_file_ngram, { "ngram" }},
61  { est_file_index, { "index" }},
62  { est_file_f_catalogue, { "f_catalogue" }},
63  { est_file_unknown, { "unknown" }},
64  { est_file_none, { "None" }},
65 };
66 
67 EST_TNamedEnum<EST_EstFileType> EstFileEnums(estfile_names);
68 
69 #if defined(INSTANTIATE_TEMPLATES)
70 
71 #include "../base_class/EST_TNamedEnum.cc"
72 template class EST_TNamedEnum<EST_EstFileType>;
76 #endif
77 
78 /** Read and parse the header of an EST_File - interim version
79 returning features rather than EST_Option
80 */
81 
82 EST_read_status read_est_header(EST_TokenStream &ts, EST_Features &hinfo,
83  bool &ascii, EST_EstFileType &t)
84 {
85  EST_String k, v;
86  char magic_number[9];
87  int pos;
88 
89  // read initial file type identifier, can't use peek or get
90  // as that could read *way* too far if it's binary so just read
91  // the first n bytes to change the magic number
92  pos = ts.tell();
93  if ((ts.fread(magic_number,sizeof(char),8) != 8) ||
94  (strncmp(magic_number,"EST_File",8) != 0))
95  {
96  ts.seek(pos);
97  return wrong_format;
98  }
99 
100  v = ts.get().string();
101  t = EstFileEnums.token(v);
102 
103  if (t == est_file_none)
104  {
105  // Its not a standardly defined type but did have EST_File on
106  // it so accept it but set FileType in the header info
107  t = est_file_unknown;
108  hinfo.set("FileType", v);
109  }
110 
111  while ((!ts.eof()) && (ts.peek().string() != "EST_Header_End"))
112  { // note this *must* be done using temporary variables
113  k = ts.get().string();
114  v = ts.get_upto_eoln().string();
115 
116  if (v.contains(RXwhite, 0))
117  v = v.after(RXwhite);
118 
119  hinfo.set(k, v);
120  }
121 
122  if (ts.eof())
123  {
124  cerr << "Unexpected end of EST_File" << endl;
125  return misc_read_error;
126  }
127  ts.get().string(); // read control EST_Header_End
128 
129  // If it explicitly says binary it is, otherwise its ascii
130  if (hinfo.S("DataType") == "binary")
131  ascii = false;
132  else
133  ascii = true;
134 
135  return format_ok;
136 }
137 
138 EST_read_status read_est_header(EST_TokenStream &ts, EST_Option &hinfo,
139  bool &ascii, EST_EstFileType &t)
140 {
141  EST_String k, v;
142  char magic_number[9];
143  int pos;
144 
145  // read initial file type identifier, can't use peek or get
146  // as that could read *way* too far if it's binary so just read
147  // the first n bytes to change the magic number
148  pos = ts.tell();
149  if ((ts.fread(magic_number,sizeof(char),8) != 8) ||
150  (strncmp(magic_number,"EST_File",8) != 0))
151  {
152  ts.seek(pos);
153  return wrong_format;
154  }
155 
156  v = ts.get().string();
157  t = EstFileEnums.token(v);
158 
159  if (t == est_file_none)
160  {
161  // Its not a standardly defined type but did have EST_File on
162  // it so accept it but set FileType in the header info
163  t = est_file_unknown;
164  hinfo.add_item("FileType",v);
165  }
166 
167  while ((!ts.eof()) && (ts.peek().string() != "EST_Header_End"))
168  { // note this *must* be done using temporary variables
169  k = ts.get().string();
170  v = ts.get_upto_eoln().string();
171 
172  if (v.contains(RXwhite, 0))
173  v = v.after(RXwhite);
174 
175  hinfo.add_item(k, v);
176  }
177 
178  if (ts.eof())
179  {
180  cerr << "Unexpected end of EST_File" << endl;
181  return misc_read_error;
182  }
183  ts.get().string(); // read control EST_Header_End
184 
185  // If it explicitly says binary it is, otherwise its ascii
186  if (hinfo.sval("DataType",0) == "binary")
187  ascii = false;
188  else
189  ascii = true;
190 
191  return format_ok;
192 }