Edinburgh Speech Tools
2.4-release
All
Classes
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Pages
EST_Regex.h
1
/************************************************************************/
2
/* */
3
/* Centre for Speech Technology Research */
4
/* University of Edinburgh, UK */
5
/* Copyright (c) 1997 */
6
/* All Rights Reserved. */
7
/* */
8
/* Permission is hereby granted, free of charge, to use and distribute */
9
/* this software and its documentation without restriction, including */
10
/* without limitation the rights to use, copy, modify, merge, publish, */
11
/* distribute, sublicense, and/or sell copies of this work, and to */
12
/* permit persons to whom this work is furnished to do so, subject to */
13
/* the following conditions: */
14
/* 1. The code must retain the above copyright notice, this list of */
15
/* conditions and the following disclaimer. */
16
/* 2. Any modifications must be clearly marked as such. */
17
/* 3. Original authors' names are not deleted. */
18
/* 4. The authors' names are not used to endorse or promote products */
19
/* derived from this software without specific prior written */
20
/* permission. */
21
/* */
22
/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25
/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30
/* THIS SOFTWARE. */
31
/* */
32
/************************************************************************/
33
34
#ifndef __EST_REGEX_H__
35
#define __EST_REGEX_H__
36
37
class
EST_Regex
;
38
39
#include "EST_String.h"
40
41
/** A Regular expression class to go with the CSTR EST_String class.
42
*
43
* The regular expression syntax is the FSF syntax used in emacs and
44
* in the FSF String library. This is translated into the syntax supported
45
* by Henry Spensor's regular expression library, this translation is a place
46
* to look if you find regular expressions not matching where expected.
47
*
48
* @see EST_String
49
* @see string_example
50
* @author Richard Caley <rjc@cstr.ed.ac.uk>
51
* @author (regular expression library by Henry Spencer, University of Toronto)
52
* @version $Id: EST_Regex.h,v 1.3 2004/05/04 00:00:16 awb Exp $
53
*/
54
55
class
EST_Regex
:
protected
EST_String
{
56
57
private
:
58
/// The compiled form.
59
void
*compiled;
60
/// Compiled form for whole string match.
61
void
*compiled_match;
62
63
protected
:
64
/// Compile expression.
65
void
compile
();
66
/// Compile expression in a form which only matches whole string.
67
void
compile_match
();
68
/// Translate the expression into the internally used syntax.
69
char
*
regularize
(
int
match)
const
;
70
71
public
:
72
/// Empty constructor, just for form.
73
EST_Regex
(
void
);
74
75
/// Construct from EST_String.
76
EST_Regex
(
EST_String
s);
77
78
/// Construct from C string.
79
EST_Regex
(
const
char
*ex);
80
81
/// Copy constructor.
82
EST_Regex
(
const
EST_Regex
&ex);
83
84
/// Destructor.
85
~EST_Regex
();
86
87
/// Size of the expression.
88
int
size
()
const
{
return
EST_String::size; };
89
90
/// Run to find a matching substring
91
int
run
(
const
char
*on,
int
from,
int
&start,
int
&end,
int
*starts=NULL,
int
*ends=NULL);
92
/// Run to see if it matches the entire string.
93
int
run_match
(
const
char
*on,
int
from=0,
int
*starts=NULL,
int
*ends=NULL);
94
95
/// Get the expression as a string.
96
EST_String
tostring
(
void
)
const
{
return
(*
this
);};
97
98
/// Cast operator, disambiguates it for some compilers
99
operator
const
char
*()
const
{
return
(
const
char
*)
tostring
(); }
100
101
int
operator == (
const
EST_Regex
ex)
const
102
{
return
(
const
EST_String
)*
this
== (
const
EST_String
)ex; }
103
104
int
operator != (
const
EST_Regex
ex)
const
105
{
return
(
const
EST_String
)*
this
!= (
const
EST_String
)ex; }
106
107
/**@name Assignment */
108
//@{
109
///
110
EST_Regex
&operator = (
const
EST_Regex
ex);
111
///
112
EST_Regex
&operator = (
const
EST_String
s);
113
///
114
EST_Regex
&operator = (
const
char
*s);
115
//@}
116
117
/// Stream output of regular expression.
118
friend
ostream &
operator <<
(ostream &s,
const
EST_Regex
&
str
);
119
};
120
121
ostream &operator << (ostream &s,
const
EST_Regex
&str);
122
123
/**@name Predefined_regular_expressions
124
* Some regular expressions matching common things are predefined
125
*/
126
//@{
127
/// White space
128
extern
EST_Regex
RXwhite;
// "[ \n\t\r]+"
129
/// Sequence of alphabetic characters.
130
extern
EST_Regex
RXalpha;
// "[A-Za-z]+"
131
/// Sequence of lower case alphabetic characters.
132
extern
EST_Regex
RXlowercase;
// "[a-z]+"
133
/// Sequence of upper case alphabetic characters.
134
extern
EST_Regex
RXuppercase;
// "[A-Z]+"
135
/// Sequence of letters and/or digits.
136
extern
EST_Regex
RXalphanum;
// "[0-9A-Za-z]+"
137
/// Initial letter or underscore followed by letters underscores or digits.
138
extern
EST_Regex
RXidentifier;
// "[A-Za-z_][0-9A-Za-z_]+"
139
/// Integer.
140
extern
EST_Regex
RXint;
// "-?[0-9]+"
141
/// Floating point number.
142
extern
EST_Regex
RXdouble;
// "-?\\(\\([0-9]+\\.[0-9]*\\)\\|\\([0-9]+\\)\\|\\(\\.[0-9]+\\)\\)\\([eE][---+]?[0-9]+\\)?"
143
//@}
144
145
// GCC lets us use the static constant to declare arrays, Sun CC
146
// doesn't, so for a quiet, if ugly, life we declare it here with a suitable
147
// value and check in EST_Regex.cc to make sure it`s OK
148
149
#define EST_Regex_max_subexpressions 10
150
151
#endif
include
EST_Regex.h
Generated on Wed Dec 24 2014 09:16:35 for Edinburgh Speech Tools by
1.8.3.1