Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
io.cc
1  /************************************************************************/
2  /* */
3  /* Centre for Speech Technology Research */
4  /* University of Edinburgh, UK */
5  /* Copyright (c) 1996,1997 */
6  /* All Rights Reserved. */
7  /* */
8  /* Permission is hereby granted, free of charge, to use and distribute */
9  /* this software and its documentation without restriction, including */
10  /* without limitation the rights to use, copy, modify, merge, publish, */
11  /* distribute, sublicense, and/or sell copies of this work, and to */
12  /* permit persons to whom this work is furnished to do so, subject to */
13  /* the following conditions: */
14  /* 1. The code must retain the above copyright notice, this list of */
15  /* conditions and the following disclaimer. */
16  /* 2. Any modifications must be clearly marked as such. */
17  /* 3. Original authors' names are not deleted. */
18  /* 4. The authors' names are not used to endorse or promote products */
19  /* derived from this software without specific prior written */
20  /* permission. */
21  /* */
22  /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23  /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24  /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25  /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26  /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27  /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28  /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29  /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30  /* THIS SOFTWARE. */
31  /* */
32  /************************************************************************/
33  /* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
34  /* Date: Tue Jun 10 1997 */
35  /************************************************************************/
36  /* */
37  /* Functions to open file descriptors for various kinds of data */
38  /* sources and sinks. */
39  /* */
40  /************************************************************************/
41 
42 #include <cstdlib>
43 #include <cstdio>
44 #include <fcntl.h>
45 #include "EST_unix.h"
46 #include "EST_socket.h"
47 
48 #include <sys/types.h>
49 
50 #include "EST_String.h"
51 #include "EST_bool.h"
52 #include "siod.h"
53 #include "siodp.h"
54 #include "io.h"
55 
56 EST_Regex RxURL("\\([a-z]+\\)://?\\([^/:]+\\)\\(:\\([0-9]+\\)\\)?\\(.*\\)");
57 EST_Regex RxFILEURL("file:.*");
58 static EST_Regex ipnum("[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+");
59 
60 const int default_http_port = 80;
61 //const int default_ftp_port = 21;
62 
63 #define MAX_LINE_LENGTH (256)
64 
65 static int port_to_int(const char *port)
66 {
67  struct servent *serv;
68 
69  if (!port || *port == '\0')
70  return -1;
71 
72  if ((serv=getservbyname(port, "tcp")))
73  return serv->s_port;
74 
75  return atoi(port);
76 }
77 
78 int parse_url(const EST_String &url,
79  EST_String &protocol,
80  EST_String &host,
81  EST_String &port,
82  EST_String &path)
83 {
84  EST_String bitpath;
85  int start_of_bracket[EST_Regex_max_subexpressions];
86  int end_of_bracket[EST_Regex_max_subexpressions];
87 
88  if (url.matches(RxFILEURL,0,start_of_bracket, end_of_bracket))
89  {
90  protocol = "file";
91  host = "";
92  port = "";
93  path = url.after("file:");
94  return TRUE;
95  }
96  else if (!url.matches(RxURL, 0, start_of_bracket, end_of_bracket))
97  return FALSE;
98 
99  protocol = url.at(start_of_bracket[1], end_of_bracket[1]-start_of_bracket[1]);
100  host = url.at(start_of_bracket[2], end_of_bracket[2]-start_of_bracket[2]);
101  port = url.at(start_of_bracket[4], end_of_bracket[4]-start_of_bracket[4]);
102  bitpath = url.at(start_of_bracket[5], end_of_bracket[5]-start_of_bracket[5]);
103 
104  if (protocol == "http")
105  path = protocol + "://" + host + bitpath;
106  else
107  path = bitpath;
108 
109  return TRUE;
110 }
111 
112 static int connect_to_server(const char *host, int port)
113 {
114  struct sockaddr_in address;
115  struct hostent *hostentp;
116  EST_String shost=host;
117  int s;
118 
119  memset(&address, 0, sizeof(address));
120 
121  if (shost.matches(ipnum))
122  {
123  address.sin_addr.s_addr = inet_addr(host);
124  address.sin_family = AF_INET;
125  }
126  else if ((hostentp=gethostbyname(host))==NULL)
127  err("can't find host", host);
128  else
129  {
130  memset(&(address.sin_addr),0,sizeof(struct in_addr));
131  address.sin_family=hostentp->h_addrtype;
132  memmove(&address.sin_addr,
133  (hostentp->h_addr_list)[0],
134  hostentp->h_length);
135  }
136  address.sin_port=htons(port);
137 
138  if ((s=socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
139  err("can't create socket", NIL);
140 
141  if (connect(s, (struct sockaddr *)&address, sizeof(address)) < 0)
142  {
143  close(s);
144  err("can't connect to host",
145  inet_ntoa(address.sin_addr));
146  }
147 
148  return s;
149 }
150 
151 static void server_send(int s, const char *text)
152 {
153  size_t n=strlen(text);
154  ssize_t sent;
155 
156  while (n>0)
157  if ((sent = write(s, text, n))<0)
158  err("error talking to server", NIL);
159  else
160  n -= sent;
161 }
162 
163 static const char *server_get_line(int s)
164 {
165  static char buffer[MAX_LINE_LENGTH+1];
166  char *p=buffer;
167  ssize_t n;
168 
169  *p='\0';
170 
171  while(1==1)
172  if ((n=read(s, p, 1)) == 0)
173  break;
174  else if (n < 0)
175  err("error while reading from server", NIL);
176  else if (*(p++) == '\n')
177  break;
178 
179  *p = '\0';
180 
181  return buffer;
182 }
183 
184 
185 /*
186  * Open stdin or stdout. Should this do a dup?
187  */
188 
189 int fd_open_stdinout(const char *r_or_w)
190 {
191  int fd = -1;
192 
193  if (r_or_w[0] == 'r')
194  fd = fileno(stdin);
195  else if (r_or_w[0] == 'w')
196  fd = fileno(stdout);
197  else
198  err("mode not understood for -", r_or_w);
199  return fd;
200 }
201 
202 /*
203  * Duplicates the fopen interpretation of the type
204  * parameter plus "rw" being a synonym for "r+" to preserve
205  * some scheme semantics.
206  */
207 int fd_open_file(const char *name, const char *r_or_w)
208 {
209  int fd;
210  int mode=0;
211  int go_to_end=0;
212 
213  if (strcmp(name, "-")==0)
214  return fd_open_stdinout(r_or_w);
215 
216  if (r_or_w[0] == 'r')
217  if (r_or_w[1] == '+' || r_or_w[1] == 'w')
218  mode = O_RDWR|O_CREAT;
219  else
220  mode = O_RDONLY;
221  else if (r_or_w[0] == 'w')
222  if (r_or_w[1] == '+')
223  mode = O_RDWR|O_CREAT|O_TRUNC;
224  else
225  mode = O_WRONLY|O_CREAT|O_TRUNC;
226  else if (r_or_w[0] == 'a')
227  if (r_or_w[1] == '+')
228  go_to_end = mode = O_RDWR;
229  else
230  go_to_end = mode = O_WRONLY|O_CREAT;
231  else
232  err("mode not understood", r_or_w);
233 
234  /* Should deal with `b' here for binary files.
235  */
236 
237  fd= open(name, mode, 0666);
238 
239  if (fd >=0 && go_to_end)
240  lseek(fd, 0, SEEK_END);
241 
242  return fd;
243 }
244 
245 int fd_open_http(const char *host,
246  int port,
247  const char *path,
248  const char *r_or_w)
249 {
250  int s;
251 
252  if (port <0)
253  port=default_http_port;
254 
255  if ((s=connect_to_server(host, port)) < 0)
256  return s;
257 
258  if (*r_or_w == 'r')
259  {
260  const char *line;
261  float http_version;
262  int code;
263  char location[1024] = "";
264 
265  server_send(s, "GET ");
266  server_send(s, path);
267  server_send(s, " HTTP/1.0\n\n");
268  shutdown(s, 1);
269 
270  line= server_get_line(s);
271 
272  if (sscanf(line, "HTTP/%f %d", &http_version, &code) != 2)
273  {
274  close(s);
275  err("HTTP error", line);
276  }
277 
278  // Skip rest of header.
279  while((line = server_get_line(s)))
280  {
281  if (*line=='\r' || *line == '\n' || *line == '\0')
282  break;
283  else if (sscanf(line, "Location: %s", location) == 1)
284  {
285  cout << "redirect to '" << location << "'\n";
286  }
287  }
288 
289  if (code == 301 || code == 302)
290  {
291  close(s);
292 
293  if (*location == '\0')
294  err("Redirection to no loction", NIL);
295 
296 
297  EST_String sprotocol, shost, sport, spath;
298 
299  if (!parse_url(location, sprotocol, shost, sport, spath))
300  err("redirection to bad URL", location);
301 
302  s = fd_open_url(sprotocol, shost, sport, spath, "rb");
303  }
304 
305  }
306  else if (*r_or_w == 'w')
307  err("Write to HTTP url not yet implemented", NIL);
308 
309  return s;
310 }
311 
312 int fd_open_ftp(const char *host,
313  int port,
314  const char *path,
315  const char *r_or_w)
316 {
317  (void)host;
318  (void)port;
319  (void)path;
320  (void)r_or_w;
321 
322  return -1;
323 }
324 
325 int fd_open_tcp(const char *host,
326  int port,
327  const char *text,
328  const char *r_or_w)
329 {
330  int s;
331 
332  if (port <0)
333  return -1;
334 
335  if ((s=connect_to_server(host, port)) < 0)
336  return s;
337 
338  server_send(s, text);
339 
340  if (*r_or_w == 'r')
341  shutdown(s, 1);
342  else if (*r_or_w == 'w')
343  shutdown(s, 0);
344 
345  return s;
346 }
347 
348 /*
349  * Open a stream to a URL.
350  */
351 
352 int fd_open_url(const char *protocol,
353  const char *host,
354  const char *port,
355  const char *path,
356  const char *r_or_w)
357 {
358  // special case for local file URLs
359  if (strcmp(protocol, "file") == 0
360  && (!host || *host == '\0')
361  && (!port || *port == '\0'))
362  return fd_open_file(path, r_or_w);
363  else if (strcmp(protocol, "file") == 0 || strcmp(protocol, "ftp") == 0)
364  return fd_open_ftp(host, port_to_int(port), path, r_or_w);
365  else if (strcmp(protocol, "http") == 0)
366  return fd_open_http(host, port_to_int(port), path, r_or_w);
367  else if (strcmp(protocol, "tcp") == 0)
368  return fd_open_tcp(host, port_to_int(port), path, r_or_w);
369  else
370  return -1;
371 }