45 #include "EST_types.h"
46 #include "EST_Track.h"
47 #include "EST_track_aux.h"
48 #include "EST_TrackMap.h"
49 #include "EST_cutils.h"
50 #include "EST_Token.h"
51 #include "EST_TList.h"
52 #include "EST_string_aux.h"
53 #include "EST_walloc.h"
54 #include "EST_TrackFile.h"
55 #include "EST_FileType.h"
56 #include "EST_WaveFile.h"
57 #include "EST_wave_utils.h"
62 #define NEARLY_ZERO 0.00001
64 #define REASONABLE_FRAME_SIZE (20)
65 #define UNREASONABLE_FRAME_SIZE (80)
68 static const char *NIST_SIG =
"NIST_1A\n 1024\n";
69 static const char *NIST_END_SIG =
"end_head\n";
70 #define NIST_HDR_SIZE 1024
72 static int def_load_sample_rate = 500;
78 int nist_get_param_int(
char *hdr,
char *field,
int def_val);
79 char *nist_get_param_str(
char *hdr,
char *field,
char *def_val);
80 const char *sample_type_to_nist(
enum EST_sample_type_t sample_type);
81 enum EST_sample_type_t nist_to_sample_type(
char *type);
84 bool &ascii, EST_EstFileType &t);
86 EST_read_status EST_TrackFile::load_esps(
const EST_String filename,
EST_Track &tr,
float ishift,
float startt)
94 int num_points, num_fields, num_values;
96 EST_read_status r_val;
100 r_val = get_track_esps(filename, &fields, &tt, &fsize, &num_points,
101 &num_values, &fixed);
102 if (r_val == misc_read_error)
104 cerr <<
"Error reading ESPS file " << filename << endl;
105 return misc_read_error;
107 else if (r_val == wrong_format)
110 num_fields = num_values;
117 tr.
resize(num_points,num_fields);
120 for (i = 0; i < num_points; ++i)
122 for (j = 0; j < num_fields; ++j)
123 tr.
a(i, j) = tt[i][j+first_channel];
129 for (i = 0; i < num_fields; ++i)
134 tr.set_single_break(
false);
135 tr.set_equal_space(
true);
138 for (i = 0; i < num_values; ++i)
141 for (i = 0; i < num_values; ++i)
145 tr.set_file_type(tff_esps);
148 if (tr.channel_name(0) ==
"F0")
154 EST_read_status EST_TrackFile::load_ascii(
const EST_String filename,
EST_Track &tr,
float ishift,
float startt)
161 int i, j, n_rows, n_cols=0;
165 if (((filename ==
"-") ? ts.
open(cin) : ts.
open(filename)) != 0)
167 cerr <<
"Can't open track file " << filename << endl;
168 return misc_read_error;
173 if (ishift < NEARLY_ZERO)
176 "Error: Frame spacing must be specified (or apparent frame shift nearly zero)\n";
177 return misc_read_error;
182 for (n_rows = 0; !ts.
eof(); ++n_rows)
188 for (n_cols = 0; !tt.
eof(); ++n_cols)
193 tr.
resize(n_rows, n_cols);
195 for (p = sl.head(), i = 0; p != 0; ++i, p = p->next())
199 for (j = 0; !tt.
eof(); ++j)
200 tr.
a(i, j) = tt.
get().Float(ok);
203 cerr <<
"Wrong number of points in row " << i << endl;
204 cerr <<
"Expected " << n_cols <<
" got " << j << endl;
205 return misc_read_error;
210 tr.set_single_break(FALSE);
211 tr.set_equal_space(TRUE);
212 tr.set_file_type(tff_ascii);
218 EST_read_status EST_TrackFile::load_xgraph(
const EST_String filename,
EST_Track &tr,
float ishift,
float startt)
226 int i, j, n_rows, n_cols;
230 if (((filename ==
"-") ? ts.
open(cin) : ts.
open(filename)) != 0)
232 cerr <<
"Can't open track file " << filename << endl;
233 return misc_read_error;
240 for (n_rows = 0; !ts.
eof(); ++n_rows)
244 for (n_cols = 0; !tt.
eof(); ++n_cols)
250 tr.
resize(n_rows, n_cols);
252 for (p = sl.head(), i = 0; p != 0; ++i, p = p->next())
256 tr.
t(i) = tt.
get().Float(ok);
257 for (j = 0; !tt.
eof(); ++j)
258 tr.
a(i, j) = tt.
get().Float(ok);
261 cerr <<
"Wrong number of points in row " << i << endl;
262 cerr <<
"Expected " << n_cols <<
" got " << j << endl;
263 return misc_read_error;
267 tr.set_single_break(FALSE);
268 tr.set_equal_space(TRUE);
269 tr.set_file_type(tff_xgraph);
275 EST_read_status EST_TrackFile::load_xmg(
const EST_String filename,
EST_Track &tr,
float ishift,
float startt)
286 if (((filename ==
"-") ? ts.
open(cin) : ts.
open(filename)) != 0)
288 cerr <<
"Can't open track file " << filename << endl;
289 return misc_read_error;
294 if (ts.
peek().string() !=
"XAO1")
299 while ((!ts.
eof()) && (ts.
peek().string() !=
"\014"))
301 k = ts.
get().string();
302 v = ts.
get().string();
305 else if (k ==
"YMin")
307 else if (k ==
"YMax")
313 cerr <<
"Unexpected end of file in reading xmg header\n";
314 return misc_read_error;
320 for (n = 0; !ts.
eof(); ++n)
326 for (p = sl.head(), i = 0; p != 0; ++i, p = p->next())
330 if (ts.
peek().string() !=
"=")
332 tr.
t(i) = ts.
get().Float(ok) / 1000.0;
333 tr.
a(i) = ts.
get().Float(ok);
342 tr.set_single_break(TRUE);
343 tr.set_equal_space(FALSE);
344 tr.set_file_type(tff_xmg);
350 EST_read_status EST_TrackFile::load_est(
const EST_String filename,
351 EST_Track &tr,
float ishift,
float startt)
356 if (((filename ==
"-") ? ts.
open(cin) : ts.
open(filename)) != 0)
358 cerr <<
"Can't open track file " << filename << endl;
359 return misc_read_error;
364 r = load_est_ts(ts, tr, ishift, startt);
366 if ((r == format_ok) && (!ts.
eof()))
368 cerr <<
"Not end of file, but expected it\n";
369 return misc_read_error;
379 if (swap) swapfloat(&f);
384 EST_Track &tr,
float ishift,
float startt)
389 int num_frames, num_channels, num_aux_channels;
399 if ((r = read_est_header(ts, hinfo, ascii, t)) != format_ok)
401 if (t != est_file_track)
402 return misc_read_error;
404 breaks = hinfo.
present(
"BreaksPresent") ?
true :
false;
406 if ((hinfo.
present(
"EqualSpace")) &&
407 ((hinfo.
S(
"EqualSpace") ==
"true") ||
408 (hinfo.
S(
"EqualSpace") ==
"1")))
411 num_frames = hinfo.
I(
"NumFrames");
412 num_channels = hinfo.
I(
"NumChannels");
413 num_aux_channels = hinfo.
I(
"NumAuxChannels", 0);
414 tr.
resize(num_frames, num_channels);
416 hinfo.
remove(
"NumFrames");
417 hinfo.
remove(
"EqualSpace");
418 hinfo.
remove(
"NumChannels");
419 hinfo.
remove(
"BreaksPresent");
421 if (hinfo.
present(
"NumAuxChannels"))
422 hinfo.
remove(
"NumAuxChannels");
429 for (p.
begin(hinfo); p;)
433 if (c->k.contains(
"Aux_Channel_"))
435 ch_map.
append(c->v.String());
438 else if (c->k.contains(
"Channel_"))
441 c->k.after(
"Channel_").Int());
452 if (!hinfo.
present(
"ByteOrder"))
454 else if (((hinfo.
S(
"ByteOrder") ==
"01") ? bo_little : bo_big)
460 const int BINARY_CHANNEL_BUFFER_SIZE=1024;
462 float frame_buffer[BINARY_CHANNEL_BUFFER_SIZE];
465 if( num_channels > BINARY_CHANNEL_BUFFER_SIZE )
466 frame =
new float[num_channels];
468 frame = frame_buffer;
472 for (i = 0; i < num_frames; ++i)
481 cerr <<
"unexpected end of file when looking for " << num_frames-i <<
" more frame(s)" << endl;
482 return misc_read_error;
484 tr.
t(i) = ts.
get().Float(ok);
486 return misc_read_error;
489 tr.
t(i) = get_float(ts,swap);
496 v = ts.
get().string();
504 if (get_float(ts,swap) == 0.0)
527 for (j = 0; j < num_channels; ++j){
528 tr.
a(i, j) = ts.
get().Float(ok);
530 return misc_read_error;
534 ts.
fread( frame,
sizeof(
float), num_channels );
536 for( j=0; j<num_channels; ++j ){
537 swapfloat( &frame[j] );
538 tr.
a(i,j) = frame[j];
541 for( j=0; j<num_channels; ++j )
542 tr.
a(i,j) = frame[j];
551 tr.aux(i, j) = ts.
get().string();
553 return misc_read_error;
557 cerr <<
"Warning: Aux Channel reading not yet implemented";
558 cerr <<
"for binary tracks\n";
564 if( frame != frame_buffer )
570 tr.set_single_break(FALSE);
571 tr.set_equal_space(eq_space);
574 tr.set_file_type(tff_est_ascii);
576 tr.set_file_type(tff_est_binary);
582 float ishift,
float startt)
591 if (ishift < NEARLY_ZERO)
594 "Error: Frame spacing must be specified (or apparent frame shift nearly zero)\n";
595 return misc_read_error;
598 if (((filename ==
"-") ? ts.
open(cin) : ts.
open(filename)) != 0)
600 cerr <<
"Can't open track file " << filename << endl;
601 return misc_read_error;
604 if (ts.
get().string() !=
"SNNS")
606 if (ts.
get().string() !=
"result")
612 int num_frames=0, num_channels=0;
619 if (t.
contains(
"teaching output included"))
626 if (k ==
"No. of output units")
627 num_channels = v.Int();
628 if (k ==
"No. of patterns")
629 num_frames = v.Int();
637 tr.
resize(num_frames, num_channels);
641 for (i = 0; (!ts.
eof()) && (i < num_frames);)
650 for (j = 0; j < num_channels; ++j)
657 for (j = 0; j < num_channels; ++j)
658 tr.
a(i, j) = ts.
get().Float(ok);
664 tr.set_single_break(FALSE);
665 tr.set_equal_space(TRUE);
666 tr.set_file_type(tff_snns);
678 int extra_channels=0;
684 cerr <<
"Output to stdout not available for ESPS file types:";
685 cerr <<
"no output written\n";
689 if ((include_time = (track_tosave.
equal_space() != TRUE)))
691 shift = EST_Track::default_frame_shift;
695 shift = track_tosave.
shift();
699 float **a =
new float*[track_tosave.
num_frames()];
703 a[i] =
new float[track_tosave.
num_channels() + extra_channels];
706 a[i][0] = track_tosave.
t(i);
709 a[i][j + extra_channels] = track_tosave.
a(i,j);
712 char **f_names =
new char*[track_tosave.
num_channels() + extra_channels];
716 f_names[i + extra_channels] = wstrdup(track_tosave.channel_name(i, esps_channel_names, 0));
720 f_names[0] = wstrdup(
"EST_TIME");
722 rc = put_track_esps(filename, f_names,
728 for (i=0; i < track_tosave.
num_frames(); i ++)
731 for (i=0; i < track_tosave.
num_channels()+extra_channels; i++)
732 delete [] f_names[i];
738 EST_write_status EST_TrackFile::save_est_ts(FILE *fp,
EST_Track tr)
742 fprintf(fp,
"EST_File Track\n");
743 fprintf(fp,
"DataType ascii\n");
744 fprintf(fp,
"NumFrames %d\n", tr.
num_frames());
749 fprintf(fp,
"BreaksPresent true\n");
751 fprintf(fp,
"Channel_%d %s\n", i, (
const char *)(tr.channel_name(i)));
754 fprintf(fp,
"Aux_Channel_%d %s\n", i,
755 (
const char *)(tr.aux_channel_name(i)));
759 for (p.
begin(tr); p; ++p)
760 fprintf(fp,
"%s %s\n", (
const char *)p->k,
761 (
const char *) p->v.String());
763 fprintf(fp,
"EST_Header_End\n");
767 fprintf(fp,
"%f\t", tr.
t(i));
768 fprintf(fp,
"%s\t", (
char *)(tr.
val(i) ?
"1 " :
"0 "));
772 fprintf(fp,
"%s ", (
const char *)tr.aux(i, j).
string());
778 EST_write_status EST_TrackFile::save_est_ascii(
const EST_String filename,
786 else if ((fd = fopen(filename,
"wb")) == NULL)
789 r = save_est_ts(fd,tr);
803 else if ((fd = fopen(filename,
"wb")) == NULL)
806 r = save_est_binary_ts(fd,tr);
814 EST_write_status EST_TrackFile::save_est_binary_ts(FILE *fp,
EST_Track tr)
821 fprintf(fp,
"EST_File Track\n");
822 fprintf(fp,
"DataType binary\n");
823 fprintf(fp,
"ByteOrder %s\n", ((EST_NATIVE_BO == bo_big) ?
"10" :
"01"));
824 fprintf(fp,
"NumFrames %d\n", tr.
num_frames());
828 fprintf(fp,
"BreaksPresent true\n");
829 fprintf(fp,
"CommentChar ;\n\n");
831 fprintf(fp,
"Channel_%d %s\n",i,tr.channel_name(i).
str());
832 fprintf(fp,
"EST_Header_End\n");
837 if((
int)fwrite(&tr.
t(i),4,1,fp) != 1)
838 return misc_write_error;
843 float bm = (tr.
val(i) ? 1 : 0);
844 if((
int)fwrite(&bm,4,1,fp) != 1)
845 return misc_write_error;
849 if((
int)fwrite(&tr.
a_no_check(i, j),4,1,fp) != 1)
850 return misc_write_error;
866 outf =
new ofstream(filename);
872 outf->setf(ios::fixed, ios::floatfield);
878 *outf << tr.
a(i, j) <<
" ";
896 outf =
new ofstream(filename);
905 *outf <<
"\""<< tr.channel_name(j) <<
"\"\n";
908 *outf << tr.
t(i) <<
"\t" << tr.
a(i, j) << endl;
918 EST_write_status save_snns_pat(
const EST_String filename,
922 int num_inputs, num_outputs, num_pats, i;
928 outf =
new ofstream(filename);
934 for (pi = inpat.head(); pi ; pi = pi->next())
935 num_pats += inpat(pi).num_frames();
937 *outf <<
"SNNS pattern definition file V3.2\n";
939 time_t thetime = time(0);
940 char *date = ctime(&thetime);
945 num_inputs = inpat.
first().num_channels();
946 num_outputs = outpat.
first().num_channels();
948 *outf <<
"No. of patterns : " << num_pats << endl;
949 *outf <<
"No. of input units : "<< num_inputs << endl;
950 *outf <<
"No. of output units : "<< num_outputs << endl;
951 *outf << endl << endl;
953 for (pi = inpat.head(), po = outpat.head(); pi ;
954 pi = pi->next(), po = po->next())
956 if (inpat(pi).num_frames() != outpat(pi).num_frames())
958 cerr <<
"Error: Input pattern has " << inpat(pi).num_frames()
959 <<
" output pattern has " << outpat(pi).num_frames() << endl;
962 return misc_write_error;
964 for (i = 0; i < inpat(pi).num_frames(); ++i)
967 *outf <<
"#Input pattern " << (i + 1) <<
":\n";
968 for (j = 0; j < inpat(pi).num_channels(); ++j)
969 *outf << inpat(pi).a(i, j) <<
" ";
971 *outf <<
"#Output pattern " << (i + 1) <<
":\n";
972 for (j = 0; j < outpat(po).num_channels(); ++j)
973 *outf << outpat(po).a(i, j) <<
" ";
1043 if (filename ==
"-")
1046 outf =
new ofstream(filename);
1052 outf->setf(ios::fixed, ios::floatfield);
1062 *outf <<
"XAO1\n\n";
1063 *outf <<
"LineType segments \n";
1064 *outf <<
"LineStyle solid \n";
1065 *outf <<
"LineWidth 0 \n";
1066 *outf <<
"Freq " << sr / 1000 << endl;
1067 *outf <<
"Format Binary \n";
1073 *outf << char(12) <<
"\n";
1080 *outf << tr.
ms_t(i) <<
"\t";
1082 *outf <<tr.
a(i, j) <<
" ";
1093 static EST_write_status save_htk_as(
const EST_String filename,
1109 if (orig.f_String(
"contour_type",
"none") ==
"ct_lpc")
1110 type = track_to_htk_lpc(orig, track);
1120 s = rint((HTK_UNITS_PER_SECOND * EST_Track::default_frame_shift/1000.0)/10.0) * 10.0;
1122 file_num_channels += 1;
1127 s = rint((HTK_UNITS_PER_SECOND * track.
shift())/10.0) * 10.0;
1137 header.num_samps = (EST_BIG_ENDIAN ? track.
num_frames()
1141 header.samp_period = (EST_BIG_ENDIAN ? (long) s : SWAPINT((
long) s));
1142 if(use_type == HTK_DISCRETE)
1143 header.samp_size = (EST_BIG_ENDIAN ? sizeof(
short) :
1144 SWAPSHORT(sizeof(
short)));
1146 header.samp_size = (EST_BIG_ENDIAN ? (sizeof(
float) * file_num_channels) :
1147 SWAPSHORT((sizeof(
float) * file_num_channels)));
1149 header.samp_type = EST_BIG_ENDIAN ? type : SWAPSHORT(type);
1153 if (filename == "-")
1155 else if ((outf = fopen(filename,"wb")) == NULL)
1157 cerr <<
"save_htk: cannot open file \"" << filename <<
1158 "\" for writing." << endl;
1159 return misc_write_error;
1163 fwrite((
char*)&(header.num_samps), 1,
sizeof(header.num_samps), outf);
1164 fwrite((
char*)&(header.samp_period), 1,
sizeof(header.samp_period), outf);
1165 fwrite((
char*)&(header.samp_size), 1,
sizeof(header.samp_size), outf);
1166 fwrite((
char*)&(header.samp_type), 1,
sizeof(header.samp_type), outf);
1169 if(use_type == HTK_DISCRETE)
1173 cerr <<
"No data to write as HTK_DISCRETE !" << endl;
1179 cerr <<
"Warning: multiple channel track being written" << endl;
1180 cerr <<
" as discrete will only save channel 0 !" << endl;
1184 short tempshort = (EST_BIG_ENDIAN ? (short)(track.
a(i, 0)) :
1185 SWAPSHORT((
short)(track.
a(i, 0)))) ;
1186 fwrite((
unsigned char*) &tempshort, 1,
sizeof(
short), outf);
1193 if ((type & HTK_EST_PS) != 0)
1196 swapfloat(&(track.
t(i)));
1197 fwrite((
unsigned char*) &(track.
t(i)), 1,
sizeof(
float), outf);
1202 swapfloat(&(track.
a(i,j)));
1203 fwrite((
unsigned char*) &(track.
a(i, j)), 1,
sizeof(
float), outf);
1215 return htk->num_samps > 0 &&
1216 htk->samp_period > 0 &&
1217 htk->samp_size > 0 &&
1218 htk->samp_size < (short)(UNREASONABLE_FRAME_SIZE *
sizeof(
float));
1221 static int htk_swapped_header(
htk_header *header)
1228 if (htk_sane_header(header))
1231 header->num_samps = SWAPINT(header->num_samps);
1232 header->samp_period = SWAPINT(header->samp_period);
1233 header->samp_size = SWAPSHORT(header->samp_size);
1234 header->samp_type = SWAPSHORT(header->samp_type);
1236 if (htk_sane_header(header))
1245 return save_htk_as(filename, tmp, HTK_FBANK);
1250 return save_htk_as(filename, tmp, HTK_FBANK);
1255 return save_htk_as(filename, tmp, HTK_MFCC);
1260 return save_htk_as(filename, tmp, HTK_MFCC | HTK_ENERGY);
1265 return save_htk_as(filename, tmp, HTK_USER);
1268 EST_write_status EST_TrackFile::save_htk_discrete(
const EST_String filename,
EST_Track tmp)
1270 return save_htk_as(filename, tmp, HTK_DISCRETE);
1274 static EST_read_status load_ema_internal(
const EST_String filename,
EST_Track &tmp,
float ishift,
float startt,
bool swap)
1279 int i, j, k, nframes, new_order;
1281 int sample_width, data_length;
1285 if ((fp = fopen(filename,
"rb")) == NULL)
1287 cerr <<
"EST_Track load: couldn't open EST_Track input file" << endl;
1288 return misc_read_error;
1291 fseek(fp, 0, SEEK_END);
1293 data_length = ftell(fp)/sample_width;
1295 nframes = data_length /new_order;
1298 cout <<
"d length: " << data_length <<
" nfr " << nframes << endl;
1300 tmp.
resize(nframes, new_order);
1302 tmp.set_equal_space(TRUE);
1304 file_data.
resize(data_length);
1306 fseek(fp, 0, SEEK_SET);
1308 if ((
int)fread(file_data.
memory(), sample_width, data_length, fp) != data_length)
1311 return misc_read_error;
1315 swap_bytes_short(file_data.
memory(), data_length);
1317 for (i = k = 0; i < nframes; ++i)
1318 for (j = 0; j < new_order; ++j, ++k)
1336 tmp.set_file_type(tff_ema);
1342 EST_read_status EST_TrackFile::load_ema(
const EST_String filename,
EST_Track &tmp,
float ishift,
float startt)
1344 return load_ema_internal(filename, tmp, ishift, startt, FALSE);
1348 EST_read_status EST_TrackFile::load_ema_swapped(
const EST_String filename,
EST_Track &tmp,
float ishift,
float startt)
1350 return load_ema_internal(filename, tmp, ishift, startt, TRUE);
1354 EST_read_status EST_TrackFile::load_NIST(
const EST_String filename,
EST_Track &tmp,
float ishift,
float startt)
1359 char header[NIST_HDR_SIZE];
1360 int samps,sample_width,data_length,actual_bo;
1361 unsigned char *file_data;
1362 enum EST_sample_type_t actual_sample_type;
1363 char *byte_order, *sample_coding;
1369 if (((filename ==
"-") ? ts.
open(cin) : ts.
open(filename)) != 0)
1371 cerr <<
"Can't open track file " << filename << endl;
1372 return misc_read_error;
1375 current_pos = ts.
tell();
1376 if (ts.
fread(header,NIST_HDR_SIZE,1) != 1)
1377 return misc_read_error;
1379 if (strncmp(header,NIST_SIG,
sizeof(NIST_SIG)) != 0)
1380 return wrong_format;
1382 samps = nist_get_param_int(header,
"sample_count",-1);
1383 int num_channels = nist_get_param_int(header,
"channel_count",1);
1384 sample_width = nist_get_param_int(header,
"sample_n_bytes",2);
1386 nist_get_param_int(header,
"sample_rate",def_load_sample_rate);
1387 byte_order = nist_get_param_str(header,
"sample_byte_format",
1388 (EST_BIG_ENDIAN ?
"10" :
"01"));
1389 sample_coding = nist_get_param_str(header,
"sample_coding",
"pcm");
1391 data_length = (samps - offset)*num_channels;
1392 file_data = walloc(
unsigned char,sample_width * data_length);
1394 ts.
seek(current_pos+NIST_HDR_SIZE+(sample_width*offset*(num_channels)));
1396 n = ts.
fread(file_data,sample_width,data_length);
1398 if ((n < 1) && (n != data_length))
1401 wfree(sample_coding);
1403 return misc_read_error;
1405 else if ((n < data_length) && (data_length/num_channels == n))
1407 fprintf(stderr,
"TRACK read: nist header is (probably) non-standard\n");
1408 fprintf(stderr,
"TRACK read: assuming different num_channel interpretation\n");
1411 else if (n < data_length)
1413 fprintf(stderr,
"TRACK read: short file %s\n",
1415 fprintf(stderr,
"WAVE read: at %d got %d instead of %d samples\n",
1416 offset,n,data_length);
1420 actual_sample_type = nist_to_sample_type(sample_coding);
1421 actual_bo = ((strcmp(byte_order,
"10") == 0) ? bo_big : bo_little);
1424 data = convert_raw_data(file_data,data_length,
1425 actual_sample_type,actual_bo);
1428 int num_samples = data_length/num_channels;
1429 tmp.
resize(num_samples, num_channels);
1430 tmp.set_equal_space(TRUE);
1433 cerr <<
"shift " << 1/(float)sample_rate << endl;
1436 for (i=0; i<num_samples; i++)
1438 for (j = 0; j < num_channels; ++j)
1439 tmp.
a(i, j) = data[k++];
1442 for (j = 0; j < num_channels; ++j)
1462 if (filename ==
"-")
1464 else if ((fd = fopen(filename,
"wb")) == NULL)
1468 char header[NIST_HDR_SIZE], p[1024];;
1471 memset(header,0,1024);
1472 strcat(header, NIST_SIG);
1473 sprintf(p,
"channel_count -i %d\n", tr.
num_channels());
1475 sprintf(p,
"sample_count -i %d\n", tr.
num_frames());
1477 int sr = (int)(rint(1/(
float)tr.
shift()));
1478 sprintf(p,
"sample_rate -i %d\n", sr);
1480 t = sample_type_to_nist(st_short);
1481 sprintf(p,
"sample_coding -s%d %s\n", (
signed)strlen(t), t);
1484 strcat(header, NIST_END_SIG);
1486 strcat(header,
"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n");
1489 if (fwrite(&header, 1024, 1, fd) != 1)
1490 return misc_write_error;
1502 int bo = str_to_bo(
"native");
1514 EST_read_status EST_TrackFile::load_htk(
const EST_String filename,
EST_Track &tmp,
float ishift,
float startt)
1523 int i,j, order, new_frames, num_values, num_channels;
1531 int header_sz =
sizeof(header);
1534 float *compressA=NULL, compressA_Buffer[REASONABLE_FRAME_SIZE];
1535 float *compressB=NULL, compressB_Buffer[REASONABLE_FRAME_SIZE];
1536 bool fileIsCompressed=
false;
1538 unsigned short samp_type, base_samp_type;
1540 if ((fp = fopen(filename,
"rb")) == NULL){
1541 cerr <<
"EST_Track load: couldn't open EST_Track input file" << endl;
1542 return misc_read_error;
1546 if (fread(&header, header_sz, 1, fp) != 1){
1548 return wrong_format;
1551 swap = htk_swapped_header(&header);
1555 return read_format_error;
1558 samp_type = header.samp_type;
1559 base_samp_type = samp_type & HTK_MASK;
1561 time_included = (samp_type & HTK_EST_PS) != 0;
1563 switch(base_samp_type){
1565 cerr <<
"Can't read HTK WAVEFORM format file into track" << endl;
1566 return misc_read_error;
1575 EST_warning(
"reading HTK_IREFC and HTK_LPREC parameter types is unsupported" );
1577 return read_format_error;
1581 pname =
"ct_cepstrum";
1586 base_samp_type = HTK_LPCCEP;
1587 samp_type = HTK_LPCCEP | HTK_DELTA;
1588 pname =
"ct_cepstrum";
1601 cerr <<
"Can't read HTK DISCRETE format file into track" << endl;
1602 return misc_read_error;
1611 return wrong_format;
1618 if( header.samp_type & HTK_COMP ){
1620 fileIsCompressed =
true;
1622 num_channels = num_values = header.samp_size /
sizeof(
short int);
1625 if (num_channels > REASONABLE_FRAME_SIZE){
1626 compressA =
new float[num_values];
1627 compressB =
new float[num_values];
1630 compressA = compressA_Buffer;
1631 compressB = compressB_Buffer;
1634 if( (fread( compressA,
sizeof(
float), num_values, fp )) !=
static_cast<size_t>(num_values) ){
1636 return read_format_error;
1639 if( (fread( compressB,
sizeof(
float), num_values, fp )) != static_cast<size_t>(num_values) ){
1641 return read_format_error;
1645 swap_bytes_float( compressA, num_values );
1646 swap_bytes_float( compressB, num_values );
1651 new_frames = header.num_samps - (2*(
sizeof(float)-
sizeof(
short int)));
1654 num_channels = num_values = header.samp_size /
sizeof(float);
1655 new_frames = header.num_samps;
1658 if (num_values > UNREASONABLE_FRAME_SIZE){
1660 return read_format_error;
1666 float shift = ((float)header.samp_period/ (
float)HTK_UNITS_PER_SECOND);
1668 tmp.
resize(new_frames, num_channels);
1670 if ((startt > 0) && (startt < NEARLY_ZERO ))
1671 EST_warning(
"setting htk file start to %f", startt );
1675 tmp.set_equal_space(!time_included);
1678 long dataBeginPosition = ftell(fp);
1679 if( dataBeginPosition == -1 ){
1681 return wrong_format;
1684 if (fseek(fp,0,SEEK_END)){
1686 return wrong_format;
1690 if ((file_length = ftell(fp)) == -1){
1692 return wrong_format;
1696 if( fileIsCompressed ){
1697 expected_vals = (file_length-dataBeginPosition) /
sizeof(
short int);
1699 if( header.samp_type & HTK_CRC )
1703 expected_vals = (file_length-dataBeginPosition) /
sizeof(
float);
1710 if( expected_vals != (num_values * new_frames) ){
1713 return wrong_format;
1718 order = num_channels;
1719 if( samp_type & HTK_NO_E )
1722 if( samp_type & HTK_AC )
1724 else if( samp_type & HTK_DELTA )
1727 if( samp_type & HTK_ENERGY )
1731 if( fseek(fp, dataBeginPosition, SEEK_SET) == -1 ){
1732 cerr <<
"Couldn't position htk file at start of data" << endl;
1734 return misc_read_error;
1737 if( fileIsCompressed ){
1738 short int *frame, frame_buffer[REASONABLE_FRAME_SIZE];
1739 if( num_values > REASONABLE_FRAME_SIZE )
1740 frame =
new short int[num_values];
1742 frame = frame_buffer;
1744 int first_channel = time_included?1:0;
1746 for( i=0; i<new_frames; i++ ){
1747 if( fread( frame,
sizeof(
short int), num_values, fp ) != (
size_t) num_values ){
1748 cerr <<
"Could not read data from htk track file" << endl;
1751 if( frame != frame_buffer )
1753 if( compressA != compressA_Buffer )
1754 delete [] compressA;
1755 if( compressB != compressB_Buffer )
1756 delete [] compressB;
1758 return misc_read_error;
1762 swap_bytes_short( frame, num_values );
1765 tmp.
t(i) = ((float)frame[0]+compressB[0])/compressA[0];
1767 for( j=0; j<num_channels; ++j ){
1768 int index = j+first_channel;
1769 tmp.
a(i,j) = ((float)frame[index]+compressB[index])/compressA[index];
1775 if( frame != frame_buffer )
1777 if( compressA != compressA_Buffer )
1778 delete [] compressA;
1779 if( compressB != compressB_Buffer )
1780 delete [] compressB;
1783 float *frame, frame_buffer[REASONABLE_FRAME_SIZE];
1785 if (num_values > REASONABLE_FRAME_SIZE)
1786 frame =
new float[num_values];
1788 frame = frame_buffer;
1790 int first_channel = time_included?1:0;
1791 for( i=0; i<new_frames; i++ ){
1792 if( fread( frame,
sizeof(
float), num_values, fp ) != (
size_t) num_values ){
1793 cerr <<
"Could not read data from htk track file" << endl;
1795 if (frame != frame_buffer)
1797 return misc_read_error;
1800 swap_bytes_float( frame, num_values );
1803 tmp.
t(i) = frame[0];
1805 for( j=0; j<num_channels; ++j )
1806 tmp.
a(i, j) = frame[j+first_channel];
1811 if( frame != frame_buffer )
1819 for (i=0;i<order;i++)
1828 if ( (samp_type & HTK_ENERGY) && !(samp_type & HTK_NO_E) )
1832 if (samp_type & HTK_DELTA){
1833 for (j = 0; j < order; j++){
1839 if (samp_type & HTK_ENERGY)
1844 if (samp_type & HTK_AC){
1845 for(j=0;j<order;j++){
1846 t =
EST_String(
"ac")+ itoString(j+1)+
"_d_d";
1850 if (samp_type & HTK_ENERGY)
1855 if (i != num_channels){
1856 cerr <<
"Something went horribly wrong - wanted " << num_values
1857 <<
" channels in track but got " << i << endl;
1859 return wrong_format;
1861 tmp.f_set(
"contour_type",pname);
1863 tmp.set_file_type(tff_htk);
1880 { channel_voiced, 1 },
1881 { channel_power, 2},
1883 { channel_unknown, 0}
1896 f0_track.assign_map(ESPSF0TrackMap);
1905 f0_track.
a(i, channel_voiced) = track.
track_break(i) ? 0.1 : 1.2;
1906 f0_track.
a(i, channel_f0) = track.
track_break(i) ? 0.0: track.
a(i,0);
1909 f0_track.set_file_type(tff_esps);
1944 if (fz.channel_name(i) ==
"prob_voice")
1949 if (fz.channel_name(i) ==
"F0")
1957 if (fz.
a(i, f) < 1.0)
1964 if (fz.
a(i, p) < 0.5)
1980 int ncoefs, nchannels;
2001 for (
int c = 0; c < ncoefs; c++)
2003 lpc.
a(i, c) = track.
a(i, channel_lpc_0, c);
2004 lpc.
t(i) = track.
t(i);
2010 for(
int ii = 0; ii< track.
num_frames(); ii++)
2011 lpc.
a(ii, ncoefs) = track.
a(ii, channel_power);
2020 for (
EST_Litem *p = tlist.head(); p ; p = p->next())
2021 tlist(p).save(tlist(p).name(), otype);
2032 for (p = files.head(); p; p = p->next())
2036 if (read_track(tlist(plp), files(p), al) != format_ok)
2039 tlist(plp).set_name(files(p));
2052 startt = al.
fval(
"-startt" );
2055 ishift = al.
fval(
"ishift");
2057 ishift = al.
fval(
"-s");
2058 else if (al.
present(
"time_channel"))
2063 if (tr.
load(in_file, al.
val(
"-itype", 0), ishift, startt) != format_ok)
2068 if (tr.
load(in_file, ishift, startt ) != format_ok)
2087 EST_String EST_TrackFile::options_short(
void)
2091 for(
int n=0; n< EST_TrackFile::map.n() ; n++)
2093 const char *nm = EST_TrackFile::map.name(EST_TrackFile::map.token(n));
2104 EST_String EST_TrackFile::options_supported(
void)
2106 EST_String s(
"AvailablE track file formats:\n");
2108 for(
int n=0; n< EST_TrackFile::map.n() ; n++)
2110 const char *nm = EST_TrackFile::map.name(EST_TrackFile::map.token(n));
2111 const char *d = EST_TrackFile::map.info(EST_TrackFile::map.token(n)).description;
2121 { tff_none, {
"none" },
2123 "unknown track file type"}},
2124 {tff_esps, {
"esps" },
2125 {TRUE, EST_TrackFile::load_esps, EST_TrackFile::save_esps,
2126 "entropic sps file"}},
2127 {tff_est_ascii, {
"est",
"est_ascii" },
2128 {TRUE, EST_TrackFile::load_est, EST_TrackFile::save_est_ascii,
2129 "Edinburgh Speech Tools track file"}},
2130 {tff_est_binary, {
"est_binary" },
2131 {TRUE, EST_TrackFile::load_est, EST_TrackFile::save_est_binary,
2132 "Edinburgh Speech Tools track file"}}
2134 {tff_htk, {
"htk" },
2135 {TRUE, EST_TrackFile::load_htk, EST_TrackFile::save_htk,
2140 {tff_htk_fbank, {
"htk_fbank" },
2141 {FALSE, EST_TrackFile::load_htk, EST_TrackFile::save_htk_fbank,
2142 "htk file (as FBANK)"}},
2143 {tff_htk_mfcc, {
"htk_mfcc" },
2144 {FALSE, EST_TrackFile::load_htk, EST_TrackFile::save_htk_mfcc,
2145 "htk file (as MFCC)"}},
2146 {tff_htk_mfcc_e, {
"htk_mfcc_e" },
2147 {FALSE, EST_TrackFile::load_htk, EST_TrackFile::save_htk_mfcc_e,
2148 "htk file (as MFCC_E)"}},
2149 {tff_htk_user, {
"htk_user" },
2150 {FALSE, EST_TrackFile::load_htk, EST_TrackFile::save_htk_user,
2151 "htk file (as USER)"}},
2152 {tff_htk_discrete, {
"htk_discrete" },
2153 {FALSE, EST_TrackFile::load_htk, EST_TrackFile::save_htk_discrete,
2154 "htk file (as DISCRETE)"}},
2155 {tff_ssff, {
"ssff"},
2156 {TRUE, EST_TrackFile::load_ssff, EST_TrackFile::save_ssff,
2157 "Macquarie University's Simple Signal File Format"}},
2158 {tff_xmg, {
"xmg" },
2159 {TRUE, EST_TrackFile::load_xmg, EST_TrackFile::save_xmg,
2160 "xmg file viewer"}},
2161 {tff_xgraph, {
"xgraph" },
2162 {FALSE, EST_TrackFile::load_xgraph, EST_TrackFile::save_xgraph,
2163 "xgraph display program format"}},
2164 {tff_ema, {
"ema" },
2165 {FALSE, EST_TrackFile::load_ema, NULL,
2167 {tff_ema_swapped, {
"ema_swapped" },
2168 {FALSE, EST_TrackFile::load_ema_swapped, NULL,
2170 {tff_ascii, {
"ascii" },
2171 {TRUE, EST_TrackFile::load_ascii, EST_TrackFile::save_ascii,
2172 "ascii decimal numbers"}},
2173 { tff_none, {
"none"}, {FALSE, NULL, NULL,
"unknown track file type"} }
2179 EST_TrackFile::TS_Info> track_ts_names[] =
2181 { tff_none, {
"none" },
2183 "unknown track file type"}},
2185 {tff_est_ascii, {
"est"},
2186 {TRUE, EST_TrackFile::load_est_ts, EST_TrackFile::save_est_ts,
2187 "Edinburgh Speech Tools track file"}},
2189 {tff_est_binary, {
"est_binary"},
2190 {TRUE, EST_TrackFile::load_est_ts, EST_TrackFile::save_est_binary_ts,
2191 "Edinburgh Speech Tools track file"}},
2193 {tff_ssff, {
"ssff"},
2194 {TRUE, EST_TrackFile::load_ssff_ts, EST_TrackFile::save_ssff_ts,
2195 "Macquarie University's Simple Signal File Format"}},
2197 { tff_none, {
"none" },
2199 "unknown track file type"}}
2203 EST_TrackFile::ts_map(track_ts_names);
2206 #if defined(INSTANTIATE_TEMPLATES)
2208 #include "../base_class/EST_TNamedEnum.cc"
2211 const char *, EST_TrackFile::Info>;
2214 const char *, EST_TrackFile::TS_Info>;