Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
EST_Regex.h
1  /************************************************************************/
2  /* */
3  /* Centre for Speech Technology Research */
4  /* University of Edinburgh, UK */
5  /* Copyright (c) 1997 */
6  /* All Rights Reserved. */
7  /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21  /* */
22  /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23  /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24  /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25  /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26  /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27  /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28  /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29  /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30  /* THIS SOFTWARE. */
31  /* */
32  /************************************************************************/
33 
34 #ifndef __EST_REGEX_H__
35 #define __EST_REGEX_H__
36 
37 class EST_Regex;
38 
39 #include "EST_String.h"
40 
41 /** A Regular expression class to go with the CSTR EST_String class.
42  *
43  * The regular expression syntax is the FSF syntax used in emacs and
44  * in the FSF String library. This is translated into the syntax supported
45  * by Henry Spensor's regular expression library, this translation is a place
46  * to look if you find regular expressions not matching where expected.
47  *
48  * @see EST_String
49  * @see string_example
50  * @author Richard Caley <rjc@cstr.ed.ac.uk>
51  * @author (regular expression library by Henry Spencer, University of Toronto)
52  * @version $Id: EST_Regex.h,v 1.3 2004/05/04 00:00:16 awb Exp $
53  */
54 
55 class EST_Regex : protected EST_String {
56 
57 private:
58  /// The compiled form.
59  void *compiled;
60  /// Compiled form for whole string match.
61  void *compiled_match;
62 
63 protected:
64  /// Compile expression.
65  void compile();
66  /// Compile expression in a form which only matches whole string.
67  void compile_match();
68  /// Translate the expression into the internally used syntax.
69  char *regularize(int match) const;
70 
71 public:
72  /// Empty constructor, just for form.
73  EST_Regex(void);
74 
75  /// Construct from EST_String.
77 
78  /// Construct from C string.
79  EST_Regex(const char *ex);
80 
81  /// Copy constructor.
82  EST_Regex(const EST_Regex &ex);
83 
84  /// Destructor.
85  ~EST_Regex();
86 
87  /// Size of the expression.
88  int size() const { return EST_String::size; };
89 
90  /// Run to find a matching substring
91  int run(const char *on, int from, int &start, int &end, int *starts=NULL, int *ends=NULL);
92  /// Run to see if it matches the entire string.
93  int run_match(const char *on, int from=0, int *starts=NULL, int *ends=NULL);
94 
95  /// Get the expression as a string.
96  EST_String tostring(void) const {return (*this);};
97 
98  /// Cast operator, disambiguates it for some compilers
99  operator const char *() const { return (const char *)tostring(); }
100 
101  int operator == (const EST_Regex ex) const
102  { return (const EST_String)*this == (const EST_String)ex; }
103 
104  int operator != (const EST_Regex ex) const
105  { return (const EST_String)*this != (const EST_String)ex; }
106 
107  /**@name Assignment */
108  //@{
109  ///
110  EST_Regex &operator = (const EST_Regex ex);
111  ///
112  EST_Regex &operator = (const EST_String s);
113  ///
114  EST_Regex &operator = (const char *s);
115  //@}
116 
117  /// Stream output of regular expression.
118  friend ostream &operator << (ostream &s, const EST_Regex &str);
119 };
120 
121 ostream &operator << (ostream &s, const EST_Regex &str);
122 
123 /**@name Predefined_regular_expressions
124  * Some regular expressions matching common things are predefined
125  */
126 //@{
127 /// White space
128 extern EST_Regex RXwhite; // "[ \n\t\r]+"
129 /// Sequence of alphabetic characters.
130 extern EST_Regex RXalpha; // "[A-Za-z]+"
131 /// Sequence of lower case alphabetic characters.
132 extern EST_Regex RXlowercase; // "[a-z]+"
133 /// Sequence of upper case alphabetic characters.
134 extern EST_Regex RXuppercase; // "[A-Z]+"
135 /// Sequence of letters and/or digits.
136 extern EST_Regex RXalphanum; // "[0-9A-Za-z]+"
137 /// Initial letter or underscore followed by letters underscores or digits.
138 extern EST_Regex RXidentifier; // "[A-Za-z_][0-9A-Za-z_]+"
139 /// Integer.
140 extern EST_Regex RXint; // "-?[0-9]+"
141 /// Floating point number.
142 extern EST_Regex RXdouble; // "-?\\(\\([0-9]+\\.[0-9]*\\)\\|\\([0-9]+\\)\\|\\(\\.[0-9]+\\)\\)\\([eE][---+]?[0-9]+\\)?"
143 //@}
144 
145 // GCC lets us use the static constant to declare arrays, Sun CC
146 // doesn't, so for a quiet, if ugly, life we declare it here with a suitable
147 // value and check in EST_Regex.cc to make sure it`s OK
148 
149 #define EST_Regex_max_subexpressions 10
150 
151 #endif