Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
token_regression.cc
1 /************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /************************************************************************/
33 /* Author: Alan W Black */
34 /* Date: May 1997 */
35 /************************************************************************/
36 /* */
37 /* Lets see if we can break the TokenStream class */
38 /* */
39 /************************************************************************/
40 
41 #include <cstdlib>
42 #include "EST_Token.h"
43 
44 static void binary_read_test();
45 
46 static void find_tokens(EST_TokenStream &ts)
47 {
48  // Count and display the tokens in this stream
49  int tokens;
50 
51  for (tokens=0; !ts.eof(); tokens++)
52  cout << ts.get().string() << endl;
53  cout << "Total: " << tokens << endl << endl;;
54 
55 }
56 
57 int main(int argc,char **argv)
58 {
59  // Simple program to read all the tokens in the named file
60  // a print a summary of them
61  (void)argc;
62  (void)argv;
63  EST_TokenStream ts;
64  EST_String s;
65 
66  // Basic tokenizing tasks changing punctuation, whitespace and
67  // single character symbols etc.
68  s = "This is a test.";
69  cout << "Test 1: " << quote_string(s) << endl;
70  ts.open_string(s);
71  find_tokens(ts);
72  ts.close();
73 
74  s = "This (is) a test.";
75  cout << "Test 2: " << quote_string(s) << endl;
76  ts.open_string(s);
77  find_tokens(ts);
78  ts.close();
79 
80  s = "This (is) a test.";
81  cout << "Test 3: " << quote_string(s) << endl;
82  ts.open_string("This (is) a test.");
83  ts.set_PrePunctuationSymbols("({[");
84  ts.set_PunctuationSymbols(EST_Token_Default_PunctuationSymbols);
85  find_tokens(ts);
86  ts.close();
87 
88  s = "This (is) a test.";
89  cout << "Test 4: " << quote_string(s) << endl;
90  ts.open_string(s);
91  ts.set_SingleCharSymbols("()");
92  ts.set_PunctuationSymbols(EST_Token_Default_PunctuationSymbols);
93  find_tokens(ts);
94  ts.close();
95 
96  s = "This \"is a\" te\\\"st.";
97  cout << "Test 5: " << quote_string(s) << endl;
98  ts.open_string(s);
99  ts.set_PrePunctuationSymbols(EST_Token_Default_PrePunctuationSymbols);
100  ts.set_PunctuationSymbols(EST_Token_Default_PunctuationSymbols);
101  find_tokens(ts);
102  ts.close();
103 
104  s = "This \"is a\" te\\\"st.";
105  cout << "Test 6: " << quote_string(s) << endl;
106  ts.open_string(s);
107  ts.set_quotes('"','\\');
108  find_tokens(ts);
109  ts.close();
110 
111  s = "This \"is \n\
112 a\" te\\\"st.";
113  cout << "Test 7: " << quote_string(s) << endl;
114  ts.open_string(s);
115  ts.set_quotes('"','\\');
116  find_tokens(ts);
117  ts.close();
118 
119  // test of reading binary data
120  binary_read_test();
121 
122  return 0;
123 }
124 
125 EST_String make_tokbins(const EST_String& filename)
126 {
127  FILE *fd;
128  char buff[64];
129  int a[2];
130  int numbytes;
131  // Make a buffer with both tokens and binary data
132  sprintf(buff,"a buffer BINARY ");
133  a[0] = 7;
134  a[1] = -34;
135  memmove(buff+16,a,sizeof(int)*2);
136  sprintf(buff+16+(sizeof(int)*2)," and tokens");
137 
138  if ((fd=fopen(filename,"w")) == NULL)
139  {
140  cerr << "Token_regression: failed to open " << filename << endl;
141  exit(-1);
142  }
143 
144  numbytes = fwrite(buff,1,16+(sizeof(int)*2)+11,fd);
145  fclose(fd);
146 
147  // Special constructions as the string contains nulls
148  return EST_String(buff,numbytes,0,numbytes);
149 }
150 
151 static void binary_read_test()
152 {
153  // You can use fread to read directly from a token stream
154  // but care should be take at the boundaries. Reading a
155  // token will always read the character following it. By
156  // convention it is recommended you include the single token
157  // BINARY follow by a single space in the stream before each
158  // binary section.
159  int b[2];
160  EST_String tokbinbuf;
161  EST_TokenStream ts;
162 
163  tokbinbuf = make_tokbins("tmp/tokbin.dat");
164 
165  // Do the reading
166 
167  cout << "Reading tokens and binary from string\n";
168 
169  ts.open_string(tokbinbuf);
170 
171  cout << ts.get() << endl;
172  cout << ts.get() << endl;
173  if (ts.get() != "BINARY")
174  {
175  cout << "failed to read binary data, missing BINARY token." << endl;
176  exit(-1);
177  }
178  ts.fread(b,sizeof(int),2);
179  cout << b[0] << endl;
180  cout << b[1] << endl;
181  cout << ts.get() << endl;
182  cout << ts.get() << endl;
183  ts.close();
184 
185  cout << "Reading tokens and binary from file\n";
186 
187  ts.open("tmp/tokbin.dat");
188 
189  cout << ts.get() << endl;
190  cout << ts.get() << endl;
191  if (ts.get() != "BINARY")
192  {
193  cout << "failed to read binary data, missing BINARY token." << endl;
194  exit(-1);
195  }
196  ts.fread(b,sizeof(int),2);
197  cout << b[0] << endl;
198  cout << b[1] << endl;
199  cout << ts.get() << endl;
200  cout << ts.get() << endl;
201  ts.close();
202 
203 }
204 
205