aboutsummaryrefslogtreecommitdiffstats
path: root/regex.c
diff options
context:
space:
mode:
authorTatsuya Kinoshita <tats@vega.ocn.ne.jp>2011-05-04 07:05:14 +0000
committerTatsuya Kinoshita <tats@vega.ocn.ne.jp>2011-05-04 07:05:14 +0000
commit72f72d64a422d6628c4796f5c0bf2e508f134214 (patch)
tree0c9ea90cc53310832c977265521fb44db24a515e /regex.c
parentAdding upstream version 0.3 (diff)
downloadw3m-1ef51c20e0ea7b35c8c9cad6f9d9bc1e16664cc5.tar.gz
w3m-1ef51c20e0ea7b35c8c9cad6f9d9bc1e16664cc5.zip
Adding upstream version 0.5.1upstream/0.5.1
Diffstat (limited to 'regex.c')
-rw-r--r--regex.c842
1 files changed, 842 insertions, 0 deletions
diff --git a/regex.c b/regex.c
new file mode 100644
index 0000000..09166fc
--- /dev/null
+++ b/regex.c
@@ -0,0 +1,842 @@
+/* $Id: regex.c,v 1.22 2003/09/24 18:49:00 ukai Exp $ */
+/*
+ * regex: Regular expression pattern match library
+ *
+ * by A.ITO, December 1989
+ * Revised by A.ITO, January 2002
+ */
+
+#ifdef REGEX_DEBUG
+#include <sys/types.h>
+#include <malloc.h>
+#endif /* REGEX_DEBUG */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <gc.h>
+#include "config.h"
+#ifdef USE_M17N
+#include "wc.h"
+#include "wtf.h"
+#ifdef USE_UNICODE
+#include "ucs.h"
+#endif
+#endif
+#include "regex.h"
+#include "config.h"
+#include "myctype.h"
+
+#ifndef NULL
+#define NULL 0
+#endif /* not NULL */
+
+#define RE_ITER_LIMIT 65535
+
+#define RE_MATCHMODE 0x07
+#define RE_NORMAL 0x00
+#define RE_ANY 0x01
+#define RE_WHICH 0x02
+#define RE_EXCEPT 0x03
+#define RE_SUBREGEX 0x04
+#define RE_BEGIN 0x05
+#define RE_END 0x06
+#define RE_ENDMARK 0x07
+
+#define RE_OPT 0x08
+#define RE_ANYTIME 0x10
+#define RE_IGNCASE 0x40
+
+#define RE_MODE(x) ((x)->mode&RE_MATCHMODE)
+#define RE_SET_MODE(x,v) ((x)->mode = (((x)->mode&~RE_MATCHMODE)|((v)&RE_MATCHMODE)))
+
+#ifdef REGEX_DEBUG
+void debugre(regexchar *);
+char *lc2c(longchar *, int);
+int verbose;
+#endif /* REGEX_DEBUG */
+
+#ifdef USE_M17N
+#define get_mclen(c) wtf_len1((wc_uchar *)(c))
+#else
+#define get_mclen(c) 1
+#endif
+
+#ifndef TOLOWER
+#include <ctype.h>
+#define TOLOWER(x) tolower(x)
+#define TOUPPER(x) toupper(x)
+#endif
+
+#define RE_TYPE_END 0
+#define RE_TYPE_CHAR 1
+#define RE_TYPE_WCHAR_T 2
+#define RE_WHICH_RANGE 3
+#define RE_TYPE_SYMBOL 4
+
+static longchar
+set_longchar(char *str)
+{
+ unsigned char *p = (unsigned char *)str;
+ longchar r;
+
+#ifdef USE_M17N
+ if (*p & 0x80) {
+ r.wch = wtf_parse1(&p);
+ if (r.wch.ccs == WC_CCS_SPECIAL || r.wch.ccs == WC_CCS_SPECIAL_W) {
+ r.type = RE_TYPE_SYMBOL;
+ return r;
+ }
+#ifdef USE_UNICODE
+ if (WC_CCS_IS_UNICODE(r.wch.ccs)) {
+ if (WC_CCS_SET(r.wch.ccs) == WC_CCS_UCS_TAG)
+ r.wch.code = wc_ucs_tag_to_ucs(r.wch.code);
+ r.wch.ccs = WC_CCS_UCS4;
+ }
+ else
+#endif
+ r.wch.ccs = WC_CCS_SET(r.wch.ccs);
+ r.type = RE_TYPE_WCHAR_T;
+ return r;
+ }
+#endif
+ r.ch = *p;
+ r.type = RE_TYPE_CHAR;
+ return r;
+}
+
+static Regex DefaultRegex;
+#define CompiledRegex DefaultRegex.re
+#define Cstorage DefaultRegex.storage
+
+static int regmatch(regexchar *, char *, char *, int, char **);
+static int regmatch1(regexchar *, longchar *);
+static int matchWhich(longchar *, longchar *, int);
+static int match_longchar(longchar *, longchar *, int);
+static int match_range_longchar(longchar *, longchar *, longchar *, int);
+
+/*
+ * regexCompile: compile regular expression
+ */
+char *
+regexCompile(char *ex, int igncase)
+{
+ char *msg;
+ newRegex(ex, igncase, &DefaultRegex, &msg);
+ return msg;
+}
+
+static Regex *
+newRegex0(char **ex, int igncase, Regex *regex, char **msg, int level)
+{
+ char *p;
+ longchar *r;
+ regexchar *re;
+ int m;
+ longchar *st_ptr;
+
+ if (regex == NULL)
+ regex = (Regex *)GC_malloc(sizeof(Regex));
+ regex->alt_regex = NULL;
+ re = regex->re;
+ st_ptr = regex->storage;
+ for (p = *ex; *p != '\0'; p++) {
+ re->mode = 0;
+ switch (*p) {
+ case '.':
+ re->p.pattern = NULL;
+ RE_SET_MODE(re, RE_ANY);
+ re++;
+ break;
+ case '$':
+ re->p.pattern = NULL;
+ RE_SET_MODE(re, RE_END);
+ re++;
+ break;
+ case '^':
+ re->p.pattern = NULL;
+ RE_SET_MODE(re, RE_BEGIN);
+ re++;
+ break;
+ case '+':
+ if (re == regex->re ||
+ (RE_MODE(re - 1) != RE_ANY && (re - 1)->p.pattern == NULL)) {
+ if (msg)
+ *msg = "Invalid regular expression";
+ return NULL;
+ }
+ *re = *(re - 1);
+ re->mode |= RE_ANYTIME;
+ re++;
+ break;
+ case '*':
+ if (re == regex->re ||
+ (RE_MODE(re - 1) != RE_ANY && (re - 1)->p.pattern == NULL)) {
+ if (msg)
+ *msg = "Invalid regular expression";
+ return NULL;
+ }
+ (re - 1)->mode |= RE_ANYTIME;
+ break;
+ case '?':
+ if (re == regex->re ||
+ (RE_MODE(re - 1) != RE_ANY && (re - 1)->p.pattern == NULL)) {
+ if (msg)
+ *msg = "Invalid regular expression";
+ return NULL;
+ }
+ (re - 1)->mode |= RE_OPT;
+ break;
+ case '[':
+ r = st_ptr;
+ if (*++p == '^') {
+ p++;
+ m = RE_EXCEPT;
+ }
+ else
+ m = RE_WHICH;
+ if (*p == '-' || *p == ']')
+ *(st_ptr++) = set_longchar(p);
+ while (*p != ']') {
+ if (*p == '\\') {
+ p++;
+ *(st_ptr++) = set_longchar(p);
+ p += get_mclen(p);
+ }
+ else if (*p == '-' && *(p + 1) != ']') {
+ (st_ptr++)->type = RE_WHICH_RANGE;
+ p++;
+ }
+ else if (*p == '\0') {
+ if (msg)
+ *msg = "Missing ]";
+ return NULL;
+ }
+ else {
+ *(st_ptr++) = set_longchar(p);
+ p += get_mclen(p);
+ }
+ if (st_ptr >= &regex->storage[STORAGE_MAX]) {
+ if (msg)
+ *msg = "Regular expression too long";
+ return NULL;
+ }
+ }
+ (st_ptr++)->type = RE_TYPE_END;
+ re->p.pattern = r;
+ RE_SET_MODE(re, m);
+ if (igncase)
+ re->mode |= RE_IGNCASE;
+ re++;
+ break;
+ case '|':
+ RE_SET_MODE(re, RE_ENDMARK);
+ re++;
+ p++;
+ regex->alt_regex = newRegex0(&p, igncase, NULL, msg, level);
+ if (regex->alt_regex == NULL)
+ return NULL;
+ *ex = p;
+ return regex;
+ case '(':
+ RE_SET_MODE(re, RE_SUBREGEX);
+ p++;
+ re->p.sub = newRegex0(&p, igncase, NULL, msg, level + 1);
+ if (re->p.sub == NULL)
+ return NULL;
+ re++;
+ break;
+ case ')':
+ if (level == 0) {
+ if (msg)
+ *msg = "Too many ')'";
+ return NULL;
+ }
+ RE_SET_MODE(re, RE_ENDMARK);
+ re++;
+ *ex = p;
+ return regex;
+ case '\\':
+ p++;
+ default:
+ *(st_ptr) = set_longchar(p);
+ p += get_mclen(p) - 1;
+ re->p.pattern = st_ptr;
+ st_ptr++;
+ RE_SET_MODE(re, RE_NORMAL);
+ if (igncase)
+ re->mode |= RE_IGNCASE;
+ re++;
+ }
+ if (st_ptr >= &regex->storage[STORAGE_MAX] ||
+ re >= &regex->re[REGEX_MAX]) {
+ if (msg)
+ *msg = "Regular expression too long";
+ return NULL;
+ }
+ }
+ RE_SET_MODE(re, RE_ENDMARK);
+ if (msg)
+ *msg = NULL;
+ *ex = p;
+ return regex;
+}
+
+Regex *
+newRegex(char *ex, int igncase, Regex *regex, char **msg)
+{
+ return newRegex0(&ex, igncase, regex, msg, 0);
+}
+
+/*
+ * regexMatch: match regular expression
+ */
+int
+regexMatch(char *str, int len, int firstp)
+{
+ return RegexMatch(&DefaultRegex, str, len, firstp);
+}
+
+int
+RegexMatch(Regex *re, char *str, int len, int firstp)
+{
+ char *p, *ep;
+ char *lpos;
+ Regex *r;
+
+ if (str == NULL)
+ return 0;
+ if (len < 0)
+ len = strlen(str);
+ re->position = NULL;
+ ep = str + len;
+ for (p = str; p <= ep; p++) {
+ lpos = NULL;
+ re->lposition = NULL;
+ for (r = re; r != NULL; r = r->alt_regex) {
+ switch (regmatch(r->re, p, ep, firstp && (p == str), &lpos)) {
+ case 1: /* matched */
+ re->position = p;
+ if (re->lposition == NULL || re->lposition < lpos)
+ re->lposition = lpos;
+ break;
+ case -1: /* error */
+ re->position = NULL;
+ return -1;
+ }
+ }
+ if (re->lposition != NULL) {
+ /* matched */
+ return 1;
+ }
+ p += get_mclen(p) - 1;
+ }
+ return 0;
+}
+
+/*
+ * matchedPosition: last matched position
+ */
+void
+MatchedPosition(Regex *re, char **first, char **last)
+{
+ *first = re->position;
+ *last = re->lposition;
+}
+
+void
+matchedPosition(char **first, char **last)
+{
+ *first = DefaultRegex.position;
+ *last = DefaultRegex.lposition;
+}
+
+/*
+ * Intermal routines
+ */
+
+struct MatchingContext1 {
+ int label;
+ regexchar *re;
+ char *lastpos;
+ char *str;
+ int iter_limit;
+ int n_any;
+ int firstp;
+ char *end_p;
+ Regex *sub_regex;
+ struct MatchingContext1 *sub_ctx;
+ struct MatchingContext2 *ctx2;
+};
+
+struct MatchingContext2 {
+ int label;
+ Regex *regex;
+ char *lastpos;
+ struct MatchingContext1 *ctx;
+ struct MatchingContext2 *ctx2;
+ char *str;
+ int n_any;
+ int firstp;
+};
+
+
+#define YIELD(retval,context,lnum) (context)->label = lnum; return (retval); label##lnum:
+
+static int regmatch_iter(struct MatchingContext1 *,
+ regexchar *, char *, char *, int);
+
+static int
+regmatch_sub_anytime(struct MatchingContext2 *c, Regex *regex,
+ regexchar * pat2,
+ char *str, char *end_p, int iter_limit, int firstp)
+{
+ switch (c->label) {
+ case 1:
+ goto label1;
+ case 2:
+ goto label2;
+ case 3:
+ goto label3;
+ }
+ c->ctx = GC_malloc(sizeof(struct MatchingContext1));
+ c->ctx2 = GC_malloc(sizeof(struct MatchingContext2));
+ c->ctx->label = 0;
+ c->regex = regex;
+ c->n_any = 0;
+ c->str = str;
+ c->firstp = firstp;
+ for (;;) {
+ c->ctx->label = 0;
+ while (regmatch_iter(c->ctx, c->regex->re, c->str, end_p, c->firstp)) {
+ c->n_any = c->ctx->lastpos - c->str;
+ if (c->n_any <= 0)
+ continue;
+ c->firstp = 0;
+ if (RE_MODE(pat2) == RE_ENDMARK) {
+ c->lastpos = c->str + c->n_any;
+ YIELD(1, c, 1);
+ }
+ else if (regmatch(pat2, c->str + c->n_any, end_p,
+ c->firstp, &c->lastpos) == 1) {
+ YIELD(1, c, 2);
+ }
+ if (iter_limit == 1)
+ continue;
+ c->ctx2->label = 0;
+ while (regmatch_sub_anytime(c->ctx2, regex, pat2,
+ c->str + c->n_any, end_p,
+ iter_limit - 1, c->firstp)) {
+
+ c->lastpos = c->ctx2->lastpos;
+ YIELD(1, c, 3);
+ }
+ }
+ if (c->regex->alt_regex == NULL)
+ break;
+ c->regex = c->regex->alt_regex;
+ }
+ return 0;
+}
+
+static int
+regmatch_iter(struct MatchingContext1 *c,
+ regexchar * re, char *str, char *end_p, int firstp)
+{
+ switch (c->label) {
+ case 1:
+ goto label1;
+ case 2:
+ goto label2;
+ case 3:
+ goto label3;
+ case 4:
+ goto label4;
+ case 5:
+ goto label5;
+ case 6:
+ goto label6;
+ case 7:
+ goto label7;
+ }
+ if (RE_MODE(re) == RE_ENDMARK)
+ return 0;
+ c->re = re;
+ c->firstp = firstp;
+ c->str = str;
+ c->end_p = end_p;
+ c->sub_ctx = NULL;
+ c->lastpos = NULL;
+ while (RE_MODE(c->re) != RE_ENDMARK) {
+ if (c->re->mode & (RE_ANYTIME | RE_OPT)) {
+ if (c->re->mode & RE_ANYTIME)
+ c->iter_limit = RE_ITER_LIMIT;
+ else
+ c->iter_limit = 1;
+ c->n_any = -1;
+ while (c->n_any < c->iter_limit) {
+ if (c->str + c->n_any >= c->end_p) {
+ return 0;
+ }
+ if (c->n_any >= 0) {
+ if (RE_MODE(c->re) == RE_SUBREGEX) {
+ c->ctx2 = GC_malloc(sizeof(struct MatchingContext2));
+ c->ctx2->label = 0;
+ while (regmatch_sub_anytime(c->ctx2,
+ c->re->p.sub,
+ c->re + 1,
+ c->str + c->n_any,
+ c->end_p,
+ c->iter_limit,
+ c->firstp)) {
+ c->n_any = c->ctx2->lastpos - c->str;
+ c->lastpos = c->ctx2->lastpos;
+ YIELD(1, c, 1);
+ }
+ return 0;
+ }
+ else {
+ longchar k;
+ k = set_longchar(c->str + c->n_any);
+ if (regmatch1(c->re, &k)) {
+ c->n_any += get_mclen(c->str + c->n_any);
+ }
+ else {
+ return 0;
+ }
+ c->firstp = 0;
+ }
+ }
+ else
+ c->n_any++;
+ if (RE_MODE(c->re + 1) == RE_ENDMARK) {
+ c->lastpos = c->str + c->n_any;
+ YIELD(1, c, 2);
+ }
+ else if (regmatch(c->re + 1, c->str + c->n_any, c->end_p,
+ c->firstp, &c->lastpos) == 1) {
+ YIELD(1, c, 3);
+ }
+ }
+ return 0;
+ }
+ /* regexp other than pat*, pat+ and pat? */
+ switch (RE_MODE(c->re)) {
+ case RE_BEGIN:
+ if (!c->firstp)
+ return 0;
+ c->re++;
+ break;
+ case RE_END:
+ if (c->str >= c->end_p) {
+ c->lastpos = c->str;
+ c->re++;
+ YIELD(1, c, 4);
+ }
+ else {
+ c->lastpos = NULL;
+ return 0;
+ }
+ break;
+ case RE_SUBREGEX:
+ if (c->sub_ctx == NULL) {
+ c->sub_ctx = GC_malloc(sizeof(struct MatchingContext1));
+ }
+ c->sub_regex = c->re->p.sub;
+ for (;;) {
+ c->sub_ctx->label = 0;
+ while (regmatch_iter(c->sub_ctx, c->sub_regex->re,
+ c->str, c->end_p, c->firstp)) {
+ if (c->sub_ctx->lastpos != c->str)
+ c->firstp = 0;
+ if (RE_MODE(c->re + 1) == RE_ENDMARK) {
+ c->lastpos = c->sub_ctx->lastpos;
+ YIELD(1, c, 5);
+ }
+ else if (regmatch(c->re + 1, c->sub_ctx->lastpos, c->end_p,
+ c->firstp, &c->lastpos) == 1) {
+ YIELD(1, c, 6);
+ }
+ }
+ if (c->sub_regex->alt_regex == NULL)
+ break;
+ c->sub_regex = c->sub_regex->alt_regex;
+ }
+ return 0;
+ default:
+ {
+ longchar k;
+ k = set_longchar(c->str);
+ c->str += get_mclen(c->str);
+ if (!regmatch1(c->re, &k))
+ return 0;
+ }
+ c->re++;
+ c->firstp = 0;
+ }
+ if (c->str > c->end_p) {
+ return 0;
+ }
+ }
+ c->lastpos = c->str;
+#ifdef REGEX_DEBUG
+ if (verbose)
+ printf("Succeed: %s %d\n", c->str, c->lastpos - c->str);
+#endif
+ YIELD(1, c, 7);
+ return 0;
+}
+
+static int
+regmatch(regexchar * re, char *str, char *end_p, int firstp, char **lastpos)
+{
+ struct MatchingContext1 contx;
+
+ *lastpos = NULL;
+
+ contx.label = 0;
+ while (regmatch_iter(&contx, re, str, end_p, firstp)) {
+#ifdef REGEX_DEBUG
+ char *p;
+ if (verbose) {
+ printf("regmatch: matched <");
+ for (p = str; p < contx.lastpos; p++)
+ putchar(*p);
+ printf(">\n");
+ }
+#endif
+ if (*lastpos == NULL || *lastpos < contx.lastpos)
+ *lastpos = contx.lastpos;
+ }
+ if (*lastpos == NULL)
+ return 0;
+ return 1;
+}
+
+
+static int
+regmatch1(regexchar * re, longchar * c)
+{
+ int ans;
+
+#ifdef USE_M17N
+ if (c->type == RE_TYPE_SYMBOL)
+ return 0;
+#endif
+ switch (RE_MODE(re)) {
+ case RE_ANY:
+#ifdef REGEX_DEBUG
+ if (verbose)
+ printf("%s vs any. -> 1\n", lc2c(c, 1));
+#endif /* REGEX_DEBUG */
+ return 1;
+ case RE_NORMAL:
+ ans = match_longchar(re->p.pattern, c, re->mode & RE_IGNCASE);
+#ifdef REGEX_DEBUG
+ if (verbose)
+ printf("RE=%s vs %s -> %d\n", lc2c(re->p.pattern, 1), lc2c(c, 1),
+ ans);
+#endif /* REGEX_DEBUG */
+ return ans;
+ case RE_WHICH:
+ return matchWhich(re->p.pattern, c, re->mode & RE_IGNCASE);
+ case RE_EXCEPT:
+ return !matchWhich(re->p.pattern, c, re->mode & RE_IGNCASE);
+ }
+ return 0;
+}
+
+static int
+matchWhich(longchar * pattern, longchar * c, int igncase)
+{
+ longchar *p = pattern;
+ int ans = 0;
+
+#ifdef REGEX_DEBUG
+ if (verbose)
+ printf("RE pattern = %s char=%s", lc2c(pattern, 10000), lc2c(c, 1));
+#endif /* REGEX_DEBUG */
+ while (p->type != RE_TYPE_END) {
+ if ((p + 1)->type == RE_WHICH_RANGE && (p + 2)->type != RE_TYPE_END) {
+ if (match_range_longchar(p, p + 2, c, igncase)) {
+ ans = 1;
+ break;
+ }
+ p += 3;
+ }
+ else {
+ if (match_longchar(p, c, igncase)) {
+ ans = 1;
+ break;
+ }
+ p++;
+ }
+ }
+#ifdef REGEX_DEBUG
+ if (verbose)
+ printf(" -> %d\n", ans);
+#endif /* REGEX_DEBUG */
+ return ans;
+}
+
+static int
+match_longchar(longchar * a, longchar * b, int ignore)
+{
+#ifdef USE_M17N
+ if (a->type != b->type)
+ return 0;
+ if (a->type == RE_TYPE_WCHAR_T)
+ return (a->wch.ccs == b->wch.ccs) && (a->wch.code == b->wch.code);
+#endif
+ if (ignore && IS_ALPHA(b->ch))
+ return (a->ch == TOLOWER(b->ch) || a->ch == TOUPPER(b->ch));
+ else
+ return a->ch == b->ch;
+}
+
+static int
+match_range_longchar(longchar * a, longchar * b, longchar * c, int ignore)
+{
+#ifdef USE_M17N
+ if (a->type != b->type || a->type != c->type)
+ return 0;
+ if (a->type == RE_TYPE_WCHAR_T)
+ return ((a->wch.ccs == c->wch.ccs && c->wch.ccs == b->wch.ccs) &&
+ (a->wch.code <= c->wch.code && c->wch.code <= b->wch.code));
+#endif
+ if (ignore && IS_ALPHA(c->ch))
+ return ((a->ch <= TOLOWER(c->ch) && TOLOWER(c->ch) <= b->ch) ||
+ (a->ch <= TOUPPER(c->ch) && TOUPPER(c->ch) <= b->ch));
+ else
+ return (a->ch <= c->ch && c->ch <= b->ch);
+}
+
+#ifdef REGEX_DEBUG
+char *
+lc2c(longchar * x, int len)
+{
+ static char y[100];
+ int i = 0, j = 0;
+ char *r;
+
+ while (x[j].type != RE_TYPE_END && j < len) {
+ if (x[j].type == RE_WHICH_RANGE)
+ y[i++] = '-';
+#ifdef USE_M17N
+ else if (x[j].type == RE_TYPE_WCHAR_T) {
+ char buf[20];
+ sprintf(buf, "[%x-%x]", x[j].wch.ccs, x[j].wch.code);
+ strcpy(&y[i], buf);
+ i += strlen(buf);
+ }
+#endif
+ else
+ y[i++] = x[j].ch;
+ j++;
+ }
+ y[i] = '\0';
+ r = GC_malloc_atomic(i + 1);
+ strcpy(r, y);
+ return r;
+}
+
+void
+debugre(regexchar * re)
+{
+ for (; RE_MODE(re) != RE_ENDMARK; re++) {
+ switch (RE_MODE(re)) {
+ case RE_BEGIN:
+ printf("Begin ");
+ continue;
+ case RE_END:
+ printf("End ");
+ continue;
+ }
+ if (re->mode & RE_ANYTIME)
+ printf("Anytime-");
+ if (re->mode & RE_OPT)
+ printf("Opt-");
+
+ switch (RE_MODE(re)) {
+ case RE_ANY:
+ printf("Any ");
+ break;
+ case RE_NORMAL:
+ printf("Match-to'%c' ", *re->p.pattern);
+ break;
+ case RE_WHICH:
+ printf("One-of\"%s\" ", lc2c(re->p.pattern, 10000));
+ break;
+ case RE_EXCEPT:
+ printf("Other-than\"%s\" ", lc2c(re->p.pattern, 10000));
+ break;
+ case RE_SUBREGEX:
+ {
+ Regex *r = re->p.sub;
+ printf("(");
+ while (r) {
+ debugre(r->re);
+ if (r->alt_regex)
+ printf(" | ");
+ r = r->alt_regex;
+ }
+ printf(")");
+ break;
+ }
+ default:
+ printf("Unknown ");
+ }
+ }
+}
+
+#endif /* REGEX_DEBUG */
+
+#ifdef REGEXTEST
+int
+main(int argc, char **argv)
+{
+ char buf[128], buf2[128];
+ char *msg;
+ Regex *re;
+ char *fpos, *epos;
+ FILE *f = stdin;
+ int i = 1;
+
+#ifdef USE_M17N
+ wtf_init(WC_CES_EUC_JP, WC_CES_EUC_JP);
+#endif
+#ifdef REGEX_DEBUG
+ for (i = 1; i < argc; i++) {
+ if (strcmp(argv[i], "-v") == 0)
+ verbose = 1;
+ else
+ break;
+ }
+#endif
+
+ if (argc > i)
+ f = fopen(argv[i], "r");
+ if (f == NULL) {
+ fprintf(stderr, "Can't open %s\n", argv[i]);
+ exit(1);
+ }
+ while (fscanf(f, "%s%s", buf, buf2) == 2) {
+ re = newRegex(buf, 0, NULL, &msg);
+ if (re == NULL) {
+ printf("Error on regexp /%s/: %s\n", buf, msg);
+ exit(1);
+ }
+ if (RegexMatch(re, buf2, -1, 1)) {
+ printf("/%s/\t\"%s\"\t\"", buf, buf2);
+ MatchedPosition(re, &fpos, &epos);
+ while (fpos < epos)
+ putchar(*(fpos++));
+ putchar('"');
+ }
+ else
+ printf("/%s/\t\"%s\"\tno_match", buf, buf2);
+ putchar('\n');
+ }
+ /* notreatched */
+ return 0;
+}
+#endif