130 #define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
131 #define OPERAND(p) ((p) + 3)
142 #define UCHARAT(p) ((int)*(unsigned char *)(p))
144 #define UCHARAT(p) ((int)*(p)&CHARBITS)
147 #define FAIL(m) { hs_regerror(m); return(NULL); }
148 #define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?')
161 static const char *regparse;
163 static char regdummy;
164 static char *regcode;
171 #define STATIC static
173 STATIC
char *reg(
int paren,
int *flagp);
174 STATIC
char *regbranch(
int *flagp);
175 STATIC
char *regpiece(
int *flagp);
176 STATIC
char *regatom(
int *flagp);
177 STATIC
char *regnode(
char op);
178 STATIC
char *regnext(
register char *p);
179 STATIC
void regc(
char b);
180 STATIC
void reginsert(
char op,
char *opnd);
181 STATIC
void regtail(
char *p,
char *val);
182 STATIC
void regoptail(
char *p,
char *val);
184 STATIC
int strcspn();
203 hs_regcomp(
const char *exp)
211 FAIL(
"NULL argument");
215 if (exp[0] ==
'.' && exp[1] ==
'*') exp += 2;
222 if (reg(0, &flags) == NULL)
226 if (regsize >= 32767L)
227 FAIL(
"regexp too big");
232 FAIL(
"out of space");
237 regcode = r->program;
239 if (reg(0, &flags) == NULL)
249 if (OP(regnext(scan)) == END) {
250 scan = OPERAND(scan);
253 if (OP(scan) == EXACTLY)
254 r->regstart = *OPERAND(scan);
255 else if (OP(scan) == BOL)
269 for (; scan != NULL; scan = regnext(scan))
270 if ((OP(scan) == EXACTLY) &&
271 (strlen(OPERAND(scan)) >= len)) {
272 longest = OPERAND(scan);
273 len = strlen(OPERAND(scan));
275 r->regmust = longest;
293 reg(
int paren,
int *flagp)
307 if (regnpar >= NSUBEXP)
311 ret = regnode(OPEN+parno);
316 br = regbranch(&flags);
323 if (!(flags&HASWIDTH))
325 *flagp |= flags&SPSTART;
326 while (*regparse ==
'|' || *regparse ==
'\n') {
328 br = regbranch(&flags);
332 if (!(flags&HASWIDTH))
334 *flagp |= flags&SPSTART;
338 ender = regnode((paren) ? CLOSE+parno : END);
342 for (br = ret; br != NULL; br = regnext(br))
343 regoptail(br, ender);
346 if (paren && *regparse++ !=
')') {
347 FAIL(
"unmatched ()");
348 }
else if (!paren && *regparse !=
'\0') {
349 if (*regparse ==
')') {
350 FAIL(
"unmatched ()");
365 regbranch(
int *flagp)
374 ret = regnode(BRANCH);
376 while (*regparse !=
'\0' && *regparse !=
')' &&
377 *regparse !=
'\n' && *regparse !=
'|') {
378 latest = regpiece(&flags);
381 *flagp |= flags&HASWIDTH;
383 *flagp |= flags&SPSTART;
385 regtail(chain, latest);
389 (void) regnode(NOTHING);
411 ret = regatom(&flags);
421 if (!(flags&HASWIDTH) && op !=
'?')
422 FAIL(
"*+ operand could be empty");
423 *flagp = (op !=
'+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
425 if (op ==
'*' && (flags&SIMPLE))
426 reginsert(STAR, ret);
427 else if (op ==
'*') {
429 reginsert(BRANCH, ret);
430 regoptail(ret, regnode(BACK));
432 regtail(ret, regnode(BRANCH));
433 regtail(ret, regnode(NOTHING));
434 }
else if (op ==
'+' && (flags&SIMPLE))
435 reginsert(PLUS, ret);
436 else if (op ==
'+') {
438 next = regnode(BRANCH);
440 regtail(regnode(BACK), ret);
441 regtail(next, regnode(BRANCH));
442 regtail(ret, regnode(NOTHING));
443 }
else if (op ==
'?') {
445 reginsert(BRANCH, ret);
446 regtail(ret, regnode(BRANCH));
447 next = regnode(NOTHING);
449 regoptail(ret, next);
452 if (ISMULT(*regparse))
474 switch (*regparse++) {
484 *flagp |= HASWIDTH|SIMPLE;
490 if (*regparse ==
'^') {
491 ret = regnode(ANYBUT);
494 ret = regnode(ANYOF);
495 if (*regparse ==
']' || *regparse ==
'-')
497 while (*regparse !=
'\0' && *regparse !=
']') {
498 if (*regparse ==
'-') {
500 if (*regparse ==
']' || *regparse ==
'\0')
503 class1 = UCHARAT(regparse-2)+1;
504 classend = UCHARAT(regparse);
505 if (class1 > classend+1)
506 FAIL(
"invalid [] range");
507 for (; class1 <= classend; class1++)
515 if (*regparse !=
']')
516 FAIL(
"unmatched []");
518 *flagp |= HASWIDTH|SIMPLE;
522 ret = reg(1, &flags);
525 *flagp |= flags&(HASWIDTH|SPSTART);
531 FAIL(
"internal urp");
536 FAIL(
"?+* follows nothing");
539 switch (*regparse++) {
544 ret = regnode(WORDA);
547 ret = regnode(WORDZ);
586 ret = regnode(EXACTLY);
587 for ( regprev = 0 ; ; ) {
595 case '.':
case '[':
case '(':
596 case ')':
case '|':
case '\n':
604 case '?':
case '+':
case '*':
613 switch (regparse[1]){
650 if (ret == ®dummy) {
670 if (regcode != ®dummy)
682 reginsert(
char op,
char *opnd)
688 if (regcode == ®dummy) {
709 regtail(
char *p,
char *val)
721 temp = regnext(scan);
727 if (OP(scan) == BACK)
731 *(scan+1) = (offset>>8)&0377;
732 *(scan+2) = offset&0377;
739 regoptail(
char *p,
char *val)
742 if (p == NULL || p == ®dummy || OP(p) != BRANCH)
744 regtail(OPERAND(p), val);
754 static const char *reginput;
755 static const char *regbol;
756 static const char **regstartp;
757 static const char **regendp;
762 STATIC
int regtry(
hs_regexp *prog,
const char *
string);
763 STATIC
int regmatch(
char *prog);
764 STATIC
int regrepeat(
char *p);
767 FILE *regnarrate = stdout;
769 STATIC
char *regprop(
char *scan);
776 hs_regexec(
const hs_regexp *prog,
const char *
string)
781 if (prog == NULL ||
string == NULL) {
782 hs_regerror(
"NULL parameter");
787 if (UCHARAT(prog->program) != MAGIC) {
788 hs_regerror(
"corrupted program");
793 if (prog->regmust != NULL) {
795 while ((s = strchr(s, prog->regmust[0])) != NULL) {
796 if (strncmp(s, prog->regmust, prog->regmlen) == 0)
805 regbol = (
char *)
string;
809 return(regtry((
hs_regexp *)prog, string));
813 if (prog->regstart !=
'\0')
815 while ((s = strchr(s, prog->regstart)) != NULL) {
825 }
while (*s++ !=
'\0');
835 regtry(
hs_regexp *prog,
const char *
string)
842 regstartp = (
const char **)prog->startp;
843 regendp = (
const char **)prog->endp;
847 for (i = NSUBEXP; i > 0; i--) {
851 if (regmatch(prog->program + 1)) {
852 prog->startp[0] = (
char *)
string;
853 prog->endp[0] = (
char *)reginput;
877 if (scan != NULL && regnarrate)
878 fprintf(regnarrate,
"%s(\n", regprop(scan));
880 while (scan != NULL) {
883 fprintf(regnarrate,
"%s...\n", regprop(scan));
885 next = regnext(scan);
889 if (reginput != regbol)
893 if (*reginput !=
'\0')
898 if ((!isalnum((
int)*reginput)) && *reginput !=
'_')
901 if (reginput > regbol &&
902 (isalnum((
int)reginput[-1]) || reginput[-1] ==
'_'))
907 if (isalnum((
int)*reginput) || *reginput ==
'_')
912 if (*reginput ==
'\0')
920 opnd = OPERAND(scan);
922 if (*opnd != *reginput)
925 if (len > 1 && strncmp(opnd, reginput, len) != 0)
931 if (*reginput ==
'\0' || strchr(OPERAND(scan), *reginput) == NULL)
936 if (*reginput ==
'\0' || strchr(OPERAND(scan), *reginput) != NULL)
956 no = OP(scan) - OPEN;
959 if (regmatch(next)) {
965 if (regstartp[no] == NULL)
966 regstartp[no] = save;
984 no = OP(scan) - CLOSE;
987 if (regmatch(next)) {
993 if (regendp[no] == NULL)
1003 if (OP(next) != BRANCH)
1004 next = OPERAND(scan);
1008 if (regmatch(OPERAND(scan)))
1011 scan = regnext(scan);
1012 }
while (scan != NULL && OP(scan) == BRANCH);
1030 if (OP(next) == EXACTLY)
1031 nextch = *OPERAND(next);
1032 min = (OP(scan) == STAR) ? 0 : 1;
1035 no = regrepeat(OPERAND(scan));
1038 if (nextch ==
'\0' || *reginput == nextch)
1045 reginput = save + no;
1054 hs_regerror(
"memory corruption");
1066 hs_regerror(
"corrupted pointers");
1084 count = strlen(scan);
1088 while (*opnd == *scan) {
1094 while (*scan !=
'\0' && strchr(opnd, *scan) != NULL) {
1100 while (*scan !=
'\0' && strchr(opnd, *scan) == NULL) {
1106 hs_regerror(
"internal foulup");
1138 STATIC
char *regprop(
char *scan);
1153 printf(
"%2d%s", s-r->program, regprop(s));
1158 printf(
"(%d)", (s-r->program)+(next-s));
1160 if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
1162 while (*s !=
'\0') {
1172 if (r->regstart !=
'\0')
1173 printf(
"start `%c' ", r->regstart);
1175 printf(
"anchored ");
1176 if (r->regmust != NULL)
1177 printf(
"must have \"%s\"", r->regmust);
1188 static char buf[50];
1190 (void) strcpy(buf,
":");
1232 sprintf(buf+strlen(buf),
"OPEN%d", OP(op)-OPEN);
1244 sprintf(buf+strlen(buf),
"CLOSE%d", OP(op)-CLOSE);
1260 hs_regerror(
"corrupted opcode");
1264 (void) strcat(buf, p);
1291 for (scan1 = s1; *scan1 !=
'\0'; scan1++) {
1292 for (scan2 = s2; *scan2 !=
'\0';)
1293 if (*scan1 == *scan2++)