Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
slib_str.cc
1 /*
2  * COPYRIGHT (c) 1988-1994 BY *
3  * PARADIGM ASSOCIATES INCORPORATED, CAMBRIDGE, MASSACHUSETTS. *
4  * See the source file SLIB.C for more information. *
5 
6  * Reorganization of files (Mar 1999) by Alan W Black <awb@cstr.ed.ac.uk>
7 
8  * String functions
9 
10 */
11 #include <cstdio>
12 #include <cstring>
13 #include <setjmp.h>
14 #include <cstdlib>
15 #include <cctype>
16 
17 #include "EST_Pathname.h"
18 #include "EST_cutils.h"
19 #include "siod.h"
20 #include "siodp.h"
21 
22 LISP strintern(const char *data)
23 {
24  return strcons(strlen(data),data);
25 }
26 
27 LISP strcons(long length,const char *data)
28 {long flag;
29  LISP s;
30  flag = no_interrupt(1);
31  s = cons(NIL,NIL);
32  s->type = tc_string;
33  s->storage_as.string.data = must_malloc(length+1);
34  s->storage_as.string.dim = length;
35  if (data)
36  memmove(s->storage_as.string.data,data,length+1);
37  no_interrupt(flag);
38  return(s);}
39 
40 LISP cstrcons(const char *data)
41 {long flag;
42  LISP s;
43  flag = no_interrupt(1);
44  s = cons(NIL,NIL);
45  s->type = tc_string;
46  s->storage_as.string.data = (char *)(void *)data;
47  s->storage_as.string.dim = strlen(data);
48  no_interrupt(flag);
49  return(s);}
50 
51 static int rfs_getc(unsigned char **p)
52 {int i;
53  i = **p;
54  if (!i) return(EOF);
55  *p = *p + 1;
56  return(i);}
57 
58 static void rfs_ungetc(unsigned char c,unsigned char **p)
59 {(void)c;
60  *p = *p - 1;}
61 
62 LISP read_from_lstring(LISP x)
63 {return read_from_string(get_c_string(x));}
64 
65 LISP read_from_string(const char *string)
66 {char *p,*q;
67  LISP r;
68  struct gen_readio s;
69  q = wstrdup(string);
70  p = q;
71  s.getc_fcn = (int (*)(char *))rfs_getc;
72  s.ungetc_fcn = (void (*)(int, char *))rfs_ungetc;
73  s.cb_argument = (char *) &p;
74  r = readtl(&s);
75  wfree(q);
76  return r;
77 }
78 
79 LISP string_append(LISP args)
80 {long size;
81  LISP l,s;
82  char *data;
83  size = 0;
84  for(l=args;NNULLP(l);l=cdr(l))
85  size += strlen(get_c_string(car(l)));
86  s = strcons(size,NULL);
87  data = s->storage_as.string.data;
88  data[0] = 0;
89  for(l=args;NNULLP(l);l=cdr(l))
90  strcat(data,get_c_string(car(l)));
91  return(s);}
92 
93 LISP string_length(LISP string)
94 {if NTYPEP(string,tc_string) err("not a string",string);
95  return(flocons((double)string->storage_as.string.dim));}
96 
97 LISP parse_number(LISP x)
98 {const char *c;
99  c = get_c_string(x);
100  return(flocons(atof(c)));}
101 
102 LISP string_downcase(LISP symbol)
103 {
104  const char *symname = get_c_string(symbol);
105  char *downsym = wstrdup(symname);
106  LISP newsym;
107  int i;
108 
109  for (i=0; symname[i] != '\0'; i++)
110  if (isupper(symname[i]))
111  downsym[i] = tolower(symname[i]);
112  else
113  downsym[i] = symname[i];
114  downsym[i] = '\0';
115  newsym = strintern(downsym);
116  wfree(downsym);
117 
118  return newsym;
119 }
120 
121 LISP string_upcase(LISP symbol)
122 {
123  const char *symname = get_c_string(symbol);
124  char *upsym = wstrdup(symname);
125  LISP newsym;
126  int i;
127 
128  for (i=0; symname[i] != '\0'; i++)
129  if (islower(symname[i]))
130  upsym[i] = toupper(symname[i]);
131  else
132  upsym[i] = symname[i];
133  upsym[i] = '\0';
134  newsym = strintern(upsym);
135  wfree(upsym);
136 
137  return newsym;
138 }
139 
140 LISP path_is_dirname(LISP lpath)
141 {
142  EST_Pathname path(get_c_string(lpath));
143 
144  return path.is_dirname()?lpath:NIL;
145 }
146 
147 LISP path_is_filename(LISP lpath)
148 {
149  EST_Pathname path(get_c_string(lpath));
150 
151  return path.is_filename()?lpath:NIL;
152 }
153 
154 LISP path_as_directory(LISP lpath)
155 {
156  EST_Pathname path(get_c_string(lpath));
157  EST_Pathname res(path.as_directory());
158  return strintern(res);
159 }
160 
161 LISP path_as_file(LISP lpath)
162 {
163  EST_Pathname path(get_c_string(lpath));
164  EST_Pathname res(path.as_file());
165 
166  return strintern(res);
167 }
168 
169 LISP path_append(LISP lpaths)
170 {
171  if (CONSP(lpaths))
172  {
173  EST_Pathname res(get_c_string(car(lpaths)));
174  lpaths = cdr(lpaths);
175  while(lpaths != NIL)
176  {
177  res = res +get_c_string(car(lpaths));
178  lpaths = cdr(lpaths);
179  }
180  return strintern(res);
181  }
182  return NIL;
183 }
184 
185 LISP path_basename(LISP lpath)
186 {
187  EST_Pathname path(get_c_string(lpath));
188  EST_Pathname res(path.basename(1));
189 
190  return strintern(res);
191 }
192 
193 LISP symbol_basename(LISP path, LISP suffix)
194 {
195  // Like UNIX basename
196  const char *pathstr = get_c_string(path);
197  const char *suff;
198  char *bname;
199  int i, j, k, start, end;
200  LISP newsym;
201 
202  if (suffix == NIL)
203  suff = "";
204  else
205  suff = get_c_string(suffix);
206 
207  for (i=strlen(pathstr); i >= 0; i--)
208  if (pathstr[i] == '/')
209  break;
210  start = i+1;
211  for (j=strlen(pathstr),k=strlen(suff); k >= 0; k--,j--)
212  if (pathstr[j] != suff[k])
213  break;
214  if (k != -1)
215  end = strlen(pathstr);
216  else
217  end = j+1;
218 
219  bname = walloc(char,end-start+1);
220  memcpy(bname,&pathstr[start],end-start);
221  bname[end-start] = '\0';
222  newsym = strcons(strlen(bname),bname);
223  wfree(bname);
224 
225  return newsym;
226 }
227 
228 
229 static LISP lisp_to_string(LISP l)
230 {
231  EST_String s;
232 
233  s = siod_sprint(l);
234  printf("%s\n",(const char *)s);
235  return strintern(s);
236 }
237 
238 static LISP symbolconc(LISP args)
239 {long size;
240  LISP l,s;
241  size = 0;
242  tkbuffer[0] = 0;
243  for(l=args;NNULLP(l);l=cdr(l))
244  {s = car(l);
245  if NSYMBOLP(s) err("wrong type of argument(non-symbol) to symbolconc",s);
246  size = size + strlen(PNAME(s));
247  if (size > TKBUFFERN) err("symbolconc buffer overflow",NIL);
248  strcat(tkbuffer,PNAME(s));}
249  return(rintern(tkbuffer));}
250 
251 LISP symbolexplode(LISP name)
252 {
253  LISP e=NIL;
254  const char *pname = get_c_string(name);
255  char tt[2];
256  int i;
257 
258  tt[1]='\0';
259 
260  for (i=0; pname[i] != '\0'; i++)
261  {
262  tt[0] = pname[i];
263  e = cons(rintern(tt),e);
264  }
265  return reverse(e);
266 }
267 
268 LISP l_matches(LISP atom, LISP regex)
269 {
270  // t if printname of atom matches regex, nil otherwise
271  const EST_String pname = get_c_string(atom);
272 
273  if (pname.matches(make_regex(get_c_string(regex))) == TRUE)
274  return truth;
275  else
276  return NIL;
277 }
278 
279 LISP l_strequal(LISP atom1, LISP atom2)
280 {
281 
282  if (streq(get_c_string(atom1),get_c_string(atom2)))
283  return truth;
284  else
285  return NIL;
286 }
287 
288 LISP l_substring(LISP string, LISP l_start, LISP l_length)
289 {
290  // As string might actually be a buffer containing nulls we
291  // do this a little carefully.
292  if (NTYPEP(string,tc_string))
293  err("not a string",string);
294 
295  const char *data = string->storage_as.string.data;
296  int dim = string->storage_as.string.dim;
297 
298  int start = ( get_c_int(l_start) < dim ? get_c_int(l_start) : dim );
299  int length = ( (get_c_int(l_length) + start) < dim ?
300  get_c_int(l_length)
301  : dim-start
302  );
303 
304  char *nbuffer = walloc(char, length+1);
305  memmove(nbuffer,data+start,length);
306  nbuffer[length] = '\0';
307 
308  LISP ncell = strcons(length, nbuffer);
309 
310  wfree(nbuffer);
311 
312  return ncell;
313 }
314 
315 static LISP l_sbefore(LISP atom, LISP before)
316 {
317  // Wraparound for EST_String.before function
318  EST_String pname = get_c_string(atom);
319  EST_String b = get_c_string(before);
320  EST_String n = pname.before(b);
321 
322  return strintern(n);
323 }
324 
325 static LISP l_safter(LISP atom, LISP after)
326 {
327  // Wraparound for EST_String.after function
328  EST_String pname = get_c_string(atom);
329  EST_String a = get_c_string(after);
330  EST_String n = pname.after(a);
331 
332  return strintern(n);
333 }
334 
335 void init_subrs_str(void)
336 {
337  init_lsubr("string-append",string_append,
338  "(string-append STR1 STR2 ...)\n\
339  Return a string made from the concatenation of the print names of STR1\n\
340  STR2 ...");
341  init_subr_1("string-length",string_length,
342  "(string-length SYMBOL)\n\
343  Return the number of characters in the print name of SYMBOL.");
344  init_subr_1("print_string",lisp_to_string,
345  "(print_string DATA)\n\
346  Returns a string representing the printing of DATA." );
347  init_subr_1("read-from-string",read_from_lstring,
348  "(read-from-string SYMBOL)\n\
349  Return first s-expression in print name of SYMBOL.");
350  init_subr_1("downcase",string_downcase,
351  "(downcase SYMBOL)\n\
352  Returns a string with the downcased version of SYMBOL's printname.");
353  init_subr_1("upcase",string_upcase,
354  "(upcase SYMBOL)\n\
355  Returns a string with the upcased version of SYMBOL's printname.");
356  init_subr_2("string-matches",l_matches,
357  "(string-matches ATOM REGEX)\n\
358  Returns t if ATOM's printname matches the regular expression REGEX,\n\
359  otherwise it returns nil.");
360  init_subr_2("string-equal",l_strequal,
361  "(string-equal ATOM1 ATOM2)\n\
362  Returns t if ATOM's printname is equal to ATOM's print name, otherwise\n\
363  it returns nil.");
364  init_subr_3("substring", l_substring,
365  "(substring STRING START LENGTH)\n\
366  Return a substring of STRING starting at START of length LENGTH.");
367  init_subr_2("string-before",l_sbefore,
368  "(string-before ATOM BEFORE)\n\
369  Returns an atom whose printname is the substring of ATOM's printname \n\
370  which appears before BEFORE. This is a wraparound for the EST_String.before \n\
371  function in C++, and hence has the same conditions for boundary cases.");
372  init_subr_2("string-after",l_safter,
373  "(string-after ATOM AFTER)\n\
374  Returns an atom whose printname is the substring of ATOM's printname \n\
375  which appears after AFTER. This is a wraparound for the EST_String.after \n\
376  function in C++, and hence has the same conditions for boundary cases.");
377 
378  init_lsubr("symbolconc",symbolconc,
379  "(symbolconc SYMBOL1 SYMBOL2 ...)\n\
380  Form new symbol by concatenation of the print forms of each of SYMBOL1\n\
381  SYMBOL2 etc.");
382  init_subr_1("symbolexplode",symbolexplode,
383  "(symbolexplode SYMBOL)\n\
384  Returns list of atoms one for each character in the print name of SYMBOL.");
385 
386  init_subr_1("parse-number",parse_number,
387  "(parse-number SYMBOL)\n\
388  Returns a number form a symbol or string whose print name is a number.");
389 
390  init_subr_2("basename",symbol_basename,
391  "(basename PATH SUFFIX)\n\
392  Return a string with directory removed from basename. If SUFFIX is\n\
393  specified remove that from end of PATH. Basically the same function\n\
394  as the UNIX command of the same name.");
395 
396 
397  init_subr_1("path-is-filename", path_is_filename,
398  "(path-is-filename PATHNAME)\n\
399  Is PATH a non-directory name.");
400 
401  init_subr_1("path-as-directory", path_as_directory,
402  "(path-as-directory PATHNAME)\n\
403  Return PATH as a directory name.");
404 
405  init_subr_1("path-as-file", path_as_file,
406  "(path-as-file PATHNAME)\n\
407  Return PATH as a non-directory name.");
408 
409  init_lsubr("path-append", path_append,
410  "(path-append DIRECTORY-PATH ADDITION1 ADDITION2 ...)\n\
411  Return a the path for ADDITION in DIRECTORY.");
412 
413  init_subr_1("path-basename", path_basename,
414  "(path-basename PATHNAME)\n\
415  Return name part of PATH.");
416 
417 
418  init_subr_1("path-is-dirname", path_is_dirname,
419  "(path-is-dirname PATHNAME)\n\
420  Is PATH a directory name.");
421 
422 }