diff options
Diffstat (limited to '')
| -rw-r--r-- | ChangeLog | 25 | ||||
| -rw-r--r-- | NEWS | 5 | ||||
| -rw-r--r-- | regex.c | 625 | ||||
| -rw-r--r-- | regex.h | 35 | 
4 files changed, 532 insertions, 158 deletions
| @@ -1,3 +1,26 @@ +2002-01-10  aito@fw.ipsj.or.jp + +	* [w3m-dev 02811] new regexp implementation +	* regex.h: remove defines +	* regex.h: struct regexchar, struct regex +	* regex.c: include stdio.h, stdlib.h, string.h, config.h +	* regex.c: remove include fm.h +	* regex.c (NULL JP_CHARSET IS_ALPHA IS_KANJI1): define unless defined +	* regex.c (RE_*) define +	* regex.c (st_ptr): deleted +	* regex.c (regmatch): remove 4th arg `int' +	* regex.c (newRegex0): rewrite, based on old newRegex +	* regex.c (newRegex): use newRegex0() +	* regex.c (RegexMatch): rewrite +	* regex.c (struct MatchingContext1): added +	* regex.c (struct MatchingContext2): added +	* regex.c (YIELD): added +	* regex.c (regmatch_sub_anytime): added +	* regex.c (regmatch_iter): added +	* regex.c (regmatch): use regmatch_iter() +	* regex.c (regmatch1): rewrite +	* regex.c (lc2c): add 2nd arg `len', rewrite +  2002-01-10  Yoshinobu Sakane <sakane@d4.bsd.nes.nec.co.jp>  	* [w3m-dev 02810] @@ -1795,4 +1818,4 @@  	* release-0-2-1  	* import w3m-0.2.1 -$Id: ChangeLog,v 1.202 2002/01/10 03:45:13 ukai Exp $ +$Id: ChangeLog,v 1.203 2002/01/10 04:55:06 ukai Exp $ @@ -1,3 +1,8 @@ +w3m 0.3? + +* new regex implementation + +----------------------------------------------------------------  w3m 0.2.4 - 2002-01-07  * RFC2818 server identity check @@ -1,18 +1,60 @@ -/* $Id: regex.c,v 1.6 2001/11/30 10:10:24 ukai Exp $ */ +/* $Id: regex.c,v 1.7 2002/01/10 04:55:07 ukai Exp $ */  /*    * regex: Regular expression pattern match library   *    * by A.ITO, December 1989 + * Revised by A.ITO, January 2002   */  #ifdef REGEX_DEBUG  #include <sys/types.h>  #include <malloc.h>  #endif				/* REGEX_DEBUG */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h>  #include <ctype.h>  #include <gc.h> -#include "fm.h"  #include "regex.h" +#include "config.h" + +#ifndef NULL +#define NULL	0 +#endif				/* not NULL */ + +#if LANG == JA +#define JP_CHARSET +#endif + +#define RE_ITER_LIMIT   65535 + +#define RE_MATCHMODE	0x07 +#define	RE_NORMAL	0x00 +#define RE_ANY		0x01 +#define RE_WHICH	0x02 +#define RE_EXCEPT	0x03 +#define RE_SUBREGEX     0x04 +#define RE_BEGIN	0x05 +#define RE_END		0x06 +#define RE_ENDMARK	0x07 + +#define RE_OPT          0x08 +#define RE_ANYTIME	0x10 +#define RE_IGNCASE      0x40 + +#define RE_MODE(x)      ((x)->mode&RE_MATCHMODE) +#define RE_SET_MODE(x,v) ((x)->mode = (((x)->mode&~RE_MATCHMODE)|((v)&RE_MATCHMODE))) + +#ifdef REGEX_DEBUG +void debugre(regexchar *); +char *lc2c(longchar *, int); +int verbose; +#endif				/* REGEX_DEBUG */ + +#ifndef IS_ALPHA +#define IS_ALPHA(x) (!((x)&0x80) && isalpha(x)) +#define IS_KANJI1(x) ((x)&0x80) +#endif  #ifdef JP_CHARSET  #define RE_KANJI(p)	(((unsigned char)*(p) << 8) | (unsigned char)*((p)+1)) @@ -24,13 +66,10 @@ static Regex DefaultRegex;  #define CompiledRegex DefaultRegex.re  #define Cstorage DefaultRegex.storage -static longchar *st_ptr; - -static int regmatch(regexchar *, char *, int, int, char **); +static int regmatch(regexchar *, char *, int, char **);  static int regmatch1(regexchar *, longchar);  static int matchWhich(longchar *, longchar); -  /*    * regexCompile: compile regular expression   */ @@ -42,41 +81,66 @@ regexCompile(char *ex, int igncase)      return msg;  } -Regex * -newRegex(char *ex, int igncase, Regex *regex, char **msg) +static Regex * +newRegex0(char **ex, int igncase, Regex *regex, char **msg, int level)  {      char *p;      longchar *r; -    regexchar *re = regex->re - 1; +    regexchar *re;      int m; +    longchar *st_ptr; -    if (regex == 0) -	regex = (Regex *)GC_malloc_atomic(sizeof(Regex)); +    if (regex == NULL) +	regex = (Regex *)GC_malloc(sizeof(Regex)); +    regex->alt_regex = NULL; +    re = regex->re;      st_ptr = regex->storage; -    for (p = ex; *p != '\0'; p++) { +    for (p = *ex; *p != '\0'; p++) { +	re->mode = 0;  	switch (*p) {  	case '.': +	    re->p.pattern = NULL; +	    RE_SET_MODE(re, RE_ANY);  	    re++; -	    re->pattern = NULL; -	    re->mode = RE_ANY;  	    break;  	case '$': +	    re->p.pattern = NULL; +	    RE_SET_MODE(re, RE_END);  	    re++; -	    re->pattern = NULL; -	    re->mode = RE_END;  	    break;  	case '^': +	    re->p.pattern = NULL; +	    RE_SET_MODE(re, RE_BEGIN);  	    re++; -	    re->pattern = NULL; -	    re->mode = RE_BEGIN;  	    break; -	case '*': -	    if (!(re->mode & RE_ANY) && re->pattern == NULL) { +	case '+': +	    if (re == regex->re || +		(RE_MODE(re - 1) != RE_ANY && (re - 1)->p.pattern == NULL)) {  		if (msg)  		    *msg = "Invalid regular expression";  		return NULL;  	    } +	    *re = *(re - 1);  	    re->mode |= RE_ANYTIME; +	    re++; +	    break; +	case '*': +	    if (re == regex->re || +		(RE_MODE(re - 1) != RE_ANY && (re - 1)->p.pattern == NULL)) { +		if (msg) +		    *msg = "Invalid regular expression"; +		return NULL; +	    } +	    (re - 1)->mode |= RE_ANYTIME; +	    break; +	case '?': +	    if (re == regex->re || +		(RE_MODE(re - 1) != RE_ANY && (re - 1)->p.pattern == NULL)) { +		if (msg) +		    *msg = "Invalid regular expression"; +		return NULL; +	    } +	    (re - 1)->mode |= RE_OPT;  	    break;  	case '[':  	    r = st_ptr; @@ -110,14 +174,40 @@ newRegex(char *ex, int igncase, Regex *regex, char **msg)  		    *(st_ptr++) = (unsigned char)*(p++);  	    }  	    *(st_ptr++) = '\0'; +	    re->p.pattern = r; +	    RE_SET_MODE(re, m); +	    re++; +	    break; +	case '|': +	    RE_SET_MODE(re, RE_ENDMARK); +	    re++; +	    p++; +	    regex->alt_regex = newRegex0(&p, igncase, NULL, msg, level); +	    if (regex->alt_regex == NULL) +		return NULL; +	    *ex = p; +	    return regex; +	case '(': +	    RE_SET_MODE(re, RE_SUBREGEX); +	    p++; +	    re->p.sub = newRegex0(&p, igncase, NULL, msg, level + 1); +	    if (re->p.sub == NULL) +		return NULL;  	    re++; -	    re->pattern = r; -	    re->mode = m;  	    break; +	case ')': +	    if (level == 0) { +		if (msg) +		    *msg = "Too many ')'"; +		return NULL; +	    } +	    RE_SET_MODE(re, RE_ENDMARK); +	    re++; +	    *ex = p; +	    return regex;  	case '\\':  	    p++;  	default: -	    re++;  #ifdef JP_CHARSET  	    if (IS_KANJI1(*p)) {  		*(st_ptr) = RE_KANJI(p); @@ -126,26 +216,33 @@ newRegex(char *ex, int igncase, Regex *regex, char **msg)  	    else  #endif  		*st_ptr = (unsigned char)*p; -	    re->pattern = st_ptr; +	    re->p.pattern = st_ptr;  	    st_ptr++; -	    re->mode = RE_NORMAL; +	    RE_SET_MODE(re, RE_NORMAL);  	    if (igncase)  		re->mode |= RE_IGNCASE; +	    re++;  	} -	if (st_ptr >= &Cstorage[STORAGE_MAX] || -	    re >= &CompiledRegex[REGEX_MAX]) { +	if (st_ptr >= ®ex->storage[STORAGE_MAX] || +	    re >= ®ex->re[REGEX_MAX]) {  	    if (msg)  		*msg = "Regular expression too long";  	    return NULL;  	}      } -    re++; -    re->mode = RE_ENDMARK; +    RE_SET_MODE(re, RE_ENDMARK);      if (msg)  	*msg = NULL; +    *ex = p;      return regex;  } +Regex * +newRegex(char *ex, int igncase, Regex *regex, char **msg) +{ +    return newRegex0(&ex, igncase, regex, msg, 0); +} +  /*    * regexMatch: match regular expression   */ @@ -159,20 +256,33 @@ int  RegexMatch(Regex *re, char *str, int len, int firstp)  {      char *p, *ep; +    char *lpos; +    Regex *r;      if (str == NULL)  	return 0; +    if (len == 0) +	len = strlen(str);      re->position = NULL; -    ep = str + ((len == 0) ? strlen(str) : len); +    ep = str + len;      for (p = str; p < ep; p++) { -	switch (regmatch -		(re->re, p, ep - p, firstp && (p == str), &re->lposition)) { -	case 1: -	    re->position = p; +	lpos = NULL; +	re->lposition = NULL; +	for (r = re; r != NULL; r = r->alt_regex) { +	    switch (regmatch(r->re, p, firstp && (p == str), &lpos)) { +	    case 1:		/* matched */ +		re->position = p; +		if (re->lposition == NULL || re->lposition < lpos) +		    re->lposition = lpos; +		break; +	    case -1:		/* error */ +		re->position = NULL; +		return -1; +	    } +	} +	if (re->lposition != NULL) { +	    /* matched */  	    return 1; -	case -1: -	    re->position = NULL; -	    return -1;  	}  #ifdef JP_CHARSET  	if (IS_KANJI1(*p)) @@ -202,118 +312,306 @@ matchedPosition(char **first, char **last)  /*    * Intermal routines   */ + +struct MatchingContext1 { +    int label; +    regexchar *re; +    char *lastpos; +    char *str; +    int iter_limit; +    int n_any; +    int firstp; +    char *end_p; +    Regex *sub_regex; +    struct MatchingContext1 *sub_ctx; +    struct MatchingContext2 *ctx2; +}; + +struct MatchingContext2 { +    int label; +    Regex *regex; +    char *lastpos; +    struct MatchingContext1 *ctx; +    struct MatchingContext2 *ctx2; +    char *str; +    int n_any; +    int firstp; +}; + + +#define YIELD(retval,context,lnum) (context)->label = lnum; return (retval); label##lnum: + +static int regmatch_iter(struct MatchingContext1 *, regexchar *, char *, int); +  static int -regmatch(regexchar * re, char *str, int len, int firstp, char **lastpos) +regmatch_sub_anytime(struct MatchingContext2 *c, Regex *regex, +		     regexchar * pat2, char *str, int iter_limit, int firstp)  { -    char *p = str, *ep = str + len; -    char *lpos, *llpos = NULL; -    longchar k; +    switch (c->label) { +    case 1: +	goto label1; +    case 2: +	goto label2; +    case 3: +	goto label3; +    } +    c->ctx = GC_malloc(sizeof(struct MatchingContext1)); +    c->ctx2 = GC_malloc(sizeof(struct MatchingContext2)); +    c->ctx->label = 0; +    c->regex = regex; +    c->n_any = 0; +    c->str = str; +    c->firstp = firstp; +    for (;;) { +	c->ctx->label = 0; +	while (regmatch_iter(c->ctx, c->regex->re, c->str, c->firstp)) { +	    c->n_any = c->ctx->lastpos - c->str; +	    if (c->n_any <= 0) +		continue; +	    c->firstp = 0; +	    if (RE_MODE(pat2) == RE_ENDMARK) { +		c->lastpos = c->str + c->n_any; +		YIELD(1, c, 1); +	    } +	    else if (regmatch(pat2, c->str + c->n_any, +			      c->firstp, &c->lastpos) == 1) { +		YIELD(1, c, 2); +	    } +	    if (iter_limit == 1) +		continue; +	    c->ctx2->label = 0; +	    while (regmatch_sub_anytime(c->ctx2, regex, pat2, +					c->str + c->n_any, iter_limit - 1, +					c->firstp)) { -    *lastpos = NULL; -#ifdef REGEX_DEBUG -    debugre(re, str); -#endif				/* REGEX_DEBUG */ -    while ((re->mode & RE_ENDMARK) == 0) { -	if (re->mode & RE_BEGIN) { -	    if (!firstp) -		return 0; -	    re++; +		c->lastpos = c->ctx2->lastpos; +		YIELD(1, c, 3); +	    }  	} -	else if (re->mode & RE_ANYTIME) { -	    short matched, ok = 0; -	    for (;;) { -		matched = 0; -		if (regmatch(re + 1, p, ep - p, firstp, &lpos) == 1) { -		    llpos = lpos; -		    matched = 1; -		    ok = 1; +	if (c->regex->alt_regex == NULL) +	    break; +	c->regex = c->regex->alt_regex; +    } +    return 0; +} + +static int +regmatch_iter(struct MatchingContext1 *c, +	      regexchar * re, char *str, int firstp) +{ +    switch (c->label) { +    case 1: +	goto label1; +    case 2: +	goto label2; +    case 3: +	goto label3; +    case 4: +	goto label4; +    case 5: +	goto label5; +    case 6: +	goto label6; +    case 7: +	goto label7; +    } +    if (RE_MODE(re) == RE_ENDMARK) +	return 0; +    c->re = re; +    c->end_p = str + strlen(str); +    c->firstp = firstp; +    c->str = str; +    c->sub_ctx = NULL; +    while (RE_MODE(c->re) != RE_ENDMARK) { +	if (c->re->mode & (RE_ANYTIME | RE_OPT)) { +	    if (c->re->mode & RE_ANYTIME) +		c->iter_limit = RE_ITER_LIMIT; +	    else +		c->iter_limit = 1; +	    c->n_any = -1; +	    while (c->n_any < c->iter_limit) { +		if (c->str + c->n_any >= c->end_p) { +		    return 0;  		} -		if (p >= ep) -		    break; +		if (c->n_any >= 0) { +		    if (RE_MODE(c->re) == RE_SUBREGEX) { +			c->ctx2 = GC_malloc(sizeof(struct MatchingContext2)); +			c->ctx2->label = 0; +			while (regmatch_sub_anytime(c->ctx2, +						    c->re->p.sub, +						    c->re + 1, +						    c->str + c->n_any, +						    c->iter_limit, +						    c->firstp)) { +			    c->n_any = c->ctx2->lastpos - c->str; +			    c->lastpos = c->ctx2->lastpos; +			    YIELD(1, c, 1); +			} +			return 0; +		    }  #ifdef JP_CHARSET -		if (IS_KANJI1(*p)) { -		    k = RE_KANJI(p); -		    if (regmatch1(re, k)) { -			if (lastpos != NULL) -			    *lastpos = llpos; -			p += 2; +		    else if (IS_KANJI1(c->str[c->n_any])) { +			longchar k; +			k = RE_KANJI(c->str + c->n_any); +			if (regmatch1(c->re, k)) { +			    c->n_any += 2; +			} +			else { +			    return 0; +			} +			c->firstp = 0;  		    } -		    else -			break; -		} -		else  #endif -		{ -		    k = (unsigned char)*p; -		    if (regmatch1(re, k)) { -			p++; -			if (lastpos != NULL) -			    *lastpos = llpos; +		    else { +			longchar k; +			k = (unsigned char)c->str[c->n_any]; +			if (regmatch1(c->re, k)) { +			    c->n_any++; +			} +			else { +			    return 0; +			} +			c->firstp = 0;  		    } -		    else -			break; +		} +		else +		    c->n_any++; +		if (RE_MODE(c->re + 1) == RE_ENDMARK) { +		    c->lastpos = c->str + c->n_any; +		    YIELD(1, c, 2); +		} +		else if (regmatch(c->re + 1, c->str + c->n_any, +				  c->firstp, &c->lastpos) == 1) { +		    YIELD(1, c, 3);  		}  	    } -	    if (lastpos != NULL) -		*lastpos = llpos; -	    return ok; -	} -	else if (re->mode & RE_END) { -	    if (lastpos != NULL) -		*lastpos = p; -	    return (p >= ep); +	    return 0;  	} -	else { -	    int a; +	/* regexp other than pat*, pat+ and pat? */ +	if (c->str >= c->end_p) +	    return 0; +	switch (RE_MODE(c->re)) { +	case RE_BEGIN: +	    if (!c->firstp) +		return 0; +	    c->re++; +	    break; +	case RE_END: +	    c->lastpos = c->str; +	    c->re++; +	    YIELD((c->str >= c->end_p), c, 4); +	    break; +	case RE_SUBREGEX: +	    if (c->sub_ctx == NULL) { +		c->sub_ctx = GC_malloc(sizeof(struct MatchingContext1)); +	    } +	    c->sub_regex = c->re->p.sub; +	    for (;;) { +		c->sub_ctx->label = 0; +		while (regmatch_iter(c->sub_ctx, c->sub_regex->re, +				     c->str, c->firstp)) { +		    if (c->sub_ctx->lastpos != c->str) +			c->firstp = 0; +		    if (RE_MODE(c->re + 1) == RE_ENDMARK) { +			c->lastpos = c->sub_ctx->lastpos; +			YIELD(1, c, 5); +		    } +		    else if (regmatch(c->re + 1, c->sub_ctx->lastpos, +				      c->firstp, &c->lastpos) == 1) { +			YIELD(1, c, 6); +		    } +		} +		if (c->sub_regex->alt_regex == NULL) +		    break; +		c->sub_regex = c->sub_regex->alt_regex; +	    } +	    return 0; +	default:  #ifdef JP_CHARSET -	    if (IS_KANJI1(*p)) { -		k = RE_KANJI(p); -		p += 2; -		a = regmatch1(re, k); +	    if (IS_KANJI1(*c->str)) { +		longchar k; +		k = RE_KANJI(c->str); +		c->str += 2; +		if (!regmatch1(c->re, k)) +		    return 0;  	    }  	    else  #endif  	    { -		k = (unsigned char)*(p++); -		a = regmatch1(re, k); +		longchar k; +		k = (unsigned char)*(c->str++); +		if (!regmatch1(c->re, k)) +		    return 0;  	    } -	    if (!a) -		return 0; -	    else -		re++; +	    c->re++; +	    c->firstp = 0; +	} +    } +    c->lastpos = c->str; +#ifdef REGEX_DEBUG +    if (verbose) +	printf("Succeed: %s %d\n", c->str, c->lastpos - c->str); +#endif +    YIELD(1, c, 7); +    return 0; +} + +static int +regmatch(regexchar * re, char *str, int firstp, char **lastpos) +{ +    struct MatchingContext1 contx; + +    *lastpos = NULL; + +    contx.label = 0; +    while (regmatch_iter(&contx, re, str, firstp)) { +#ifdef REGEX_DEBUG +	char *p; +	if (verbose) { +	    printf("regmatch: matched <"); +	    for (p = str; p < contx.lastpos; p++) +		putchar(*p); +	    printf(">\n");  	} +#endif +	if (*lastpos == NULL || *lastpos < contx.lastpos) +	    *lastpos = contx.lastpos;      } -    if (lastpos != NULL) -	*lastpos = p; +    if (*lastpos == NULL) +	return 0;      return 1;  } +  static int  regmatch1(regexchar * re, longchar c)  { -    switch (re->mode & RE_MATCHMODE) { +    switch (RE_MODE(re)) {      case RE_ANY:  #ifdef REGEX_DEBUG -	printf("%c vs any. -> 1\n", c); +	if (verbose) +	    printf("%c vs any. -> 1\n", c);  #endif				/* REGEX_DEBUG */  	return 1;      case RE_NORMAL:  #ifdef REGEX_DEBUG -	printf("RE=%c vs %c -> %d\n", *re->pattern, c, *re->pattern == c); +	if (verbose) +	    printf("RE=%c vs %c -> %d\n", *re->p.pattern, c, +		   *re->p.pattern == c);  #endif				/* REGEX_DEBUG */  	if (re->mode & RE_IGNCASE) { -	    if (*re->pattern < 127 && c < 127 && -		IS_ALPHA(*re->pattern) && IS_ALPHA(c)) -		return tolower(*re->pattern) == tolower(c); +	    if (*re->p.pattern < 127 && c < 127 && +		IS_ALPHA(*re->p.pattern) && IS_ALPHA(c)) +		return tolower(*re->p.pattern) == tolower(c);  	    else -		return *re->pattern == c; +		return *re->p.pattern == c;  	}  	else -	    return (*re->pattern == c); +	    return (*re->p.pattern == c);      case RE_WHICH: -	return matchWhich(re->pattern, c); +	return matchWhich(re->p.pattern, c);      case RE_EXCEPT: -	return !matchWhich(re->pattern, c); +	return !matchWhich(re->p.pattern, c);      }      return 0;  } @@ -325,7 +623,8 @@ matchWhich(longchar * pattern, longchar c)      int ans = 0;  #ifdef REGEX_DEBUG -    printf("RE pattern = %s char=%c", pattern, c); +    if (verbose) +	printf("RE pattern = %s char=%s", lc2c(pattern, 10000), lc2c(&c, 1));  #endif				/* REGEX_DEBUG */      while (*p != '\0') {  	if (*(p + 1) == RE_WHICH_RANGE && *(p + 2) != '\0') {	/* Char class. */ @@ -344,64 +643,130 @@ matchWhich(longchar * pattern, longchar c)  	}      }  #ifdef REGEX_DEBUG -    printf(" -> %d\n", ans); +    if (verbose) +	printf(" -> %d\n", ans);  #endif				/* REGEX_DEBUG */      return ans;  }  #ifdef REGEX_DEBUG  char * -lc2c(longchar * x) +lc2c(longchar * x, int len)  {      static char y[100];      int i = 0; +    char *r; -    while (x[i]) { +    while (x[i] && i < len) {  	if (x[i] == RE_WHICH_RANGE) -	    y[i] = '-'; +	    y[i++] = '-'; +	else if (x[i] >= 128) { +	    y[i++] = ((x[i] >> 8) & 0xff); +	    y[i++] = (x[i] & 0xff); +	}  	else -	    y[i] = x[i]; -	i++; +	    y[i++] = x[i];      }      y[i] = '\0'; -    return y; +    r = GC_malloc_atomic(i + 1); +    strcpy(r, y); +    return r;  }  void -debugre(re, s) -     regexchar *re; -     char *s; +debugre(regexchar * re)  { -    for (; !(re->mode & RE_ENDMARK); re++) { -	if (re->mode & RE_BEGIN) { +    for (; RE_MODE(re) != RE_ENDMARK; re++) { +	switch (RE_MODE(re)) { +	case RE_BEGIN:  	    printf("Begin ");  	    continue; -	} -	else if (re->mode & RE_END) { +	case RE_END:  	    printf("End ");  	    continue;  	}  	if (re->mode & RE_ANYTIME)  	    printf("Anytime-"); +	if (re->mode & RE_OPT) +	    printf("Opt-"); -	switch (re->mode & RE_MATCHMODE) { +	switch (RE_MODE(re)) {  	case RE_ANY:  	    printf("Any ");  	    break;  	case RE_NORMAL: -	    printf("Match-to'%c' ", *re->pattern); +	    printf("Match-to'%c' ", *re->p.pattern);  	    break;  	case RE_WHICH: -	    printf("One-of\"%s\" ", lc2c(re->pattern)); +	    printf("One-of\"%s\" ", lc2c(re->p.pattern, 10000));  	    break;  	case RE_EXCEPT: -	    printf("Other-than\"%s\" ", lc2c(re->pattern)); +	    printf("Other-than\"%s\" ", lc2c(re->p.pattern, 10000));  	    break; +	case RE_SUBREGEX: +	    { +		Regex *r = re->p.sub; +		printf("("); +		while (r) { +		    debugre(r->re); +		    if (r->alt_regex) +			printf(" | "); +		    r = r->alt_regex; +		} +		printf(")"); +		break; +	    }  	default:  	    printf("Unknown ");  	}      } -    putchar('\n');  }  #endif				/* REGEX_DEBUG */ + +#ifdef REGEXTEST +int +main(int argc, char **argv) +{ +    char buf[128], buf2[128]; +    char *msg; +    Regex *re; +    char *fpos, *epos; +    FILE *f = stdin; +    int i = 1; + +#ifdef REGEX_DEBUG +    for (i = 1; i < argc; i++) { +	if (strcmp(argv[i], "-v") == 0) +	    verbose = 1; +	else +	    break; +    } +#endif + +    if (argc > i) +	f = fopen(argv[i], "r"); +    if (f == NULL) { +	fprintf(stderr, "Can't open %s\n", argv[i]); +	exit(1); +    } +    while (fscanf(f, "%s%s", buf, buf2) == 2) { +	re = newRegex(buf, 0, NULL, &msg); +	if (re == NULL) { +	    printf("Error on regexp /%s/: %s\n", buf, msg); +	    exit(1); +	} +	if (RegexMatch(re, buf2, 0, 1)) { +	    printf("/%s/\t%s\t", buf, buf2); +	    MatchedPosition(re, &fpos, &epos); +	    while (fpos < epos) +		putchar(*(fpos++)); +	} +	else +	    printf("/%s/\t%s\tno_match", buf, buf2); +	putchar('\n'); +    } +    /* notreatched */ +    return 0; +} +#endif @@ -1,44 +1,25 @@ -/* $Id: regex.h,v 1.3 2001/11/24 02:01:26 ukai Exp $ */ +/* $Id: regex.h,v 1.4 2002/01/10 04:55:07 ukai Exp $ */  #define REGEX_MAX	64  #define STORAGE_MAX	256 -#ifndef NULL -#define NULL	0 -#endif				/* not NULL */ - -#define	RE_NORMAL	0 -#define RE_MATCHMODE	0x07 -#define RE_ANY		0x01 -#define RE_WHICH	0x02 -#define RE_EXCEPT	0x04 -#define RE_ANYTIME	0x08 -#define RE_BEGIN	0x10 -#define RE_END		0x20 -#define RE_IGNCASE      0x40 -#define RE_ENDMARK	0x80  typedef unsigned short longchar; - -typedef struct { - -    longchar *pattern; - +typedef struct regexchar { +    union { +	longchar *pattern; +	struct regex *sub; +    } p;      unsigned char mode; -  } regexchar; -typedef struct { - +typedef struct regex {      regexchar re[REGEX_MAX]; -      longchar storage[STORAGE_MAX]; -      char *position; -      char *lposition; - +    struct regex *alt_regex;  } Regex; | 
