diff options
author | Fumitoshi UKAI <ukai@debian.or.jp> | 2002-01-10 15:39:21 +0000 |
---|---|---|
committer | Fumitoshi UKAI <ukai@debian.or.jp> | 2002-01-10 15:39:21 +0000 |
commit | e7c6a1c7f9170ec0d3c1c4581d1afc9984bcf733 (patch) | |
tree | 295a233ae838adbbee9e942504e9573cb179a8a4 | |
parent | [w3m-dev 02811] new regexp implementation (diff) | |
download | w3m-e7c6a1c7f9170ec0d3c1c4581d1afc9984bcf733.tar.gz w3m-e7c6a1c7f9170ec0d3c1c4581d1afc9984bcf733.zip |
[w3m-dev 02815]
From: aito@fw.ipsj.or.jp
Diffstat (limited to '')
-rw-r--r-- | ChangeLog | 16 | ||||
-rw-r--r-- | cookie.c | 8 | ||||
-rw-r--r-- | main.c | 14 | ||||
-rw-r--r-- | menu.c | 4 | ||||
-rw-r--r-- | regex.c | 59 | ||||
-rw-r--r-- | regex.h | 2 | ||||
-rw-r--r-- | url.c | 6 |
7 files changed, 63 insertions, 46 deletions
@@ -1,3 +1,17 @@ +2002-01-11 aito@fw.ipsj.or.jp + + * [w3m-dev 02815] + * cookie.c (domain_match): use new regex + * main.c (chkURLBuffer): use new regex + * main.c (chkNMIDBuffer): use new regex + * menu.c (menuForwardSearch): new regexMatch() + * regex.c (RegexMatch): negative length means whole string + * regex.c (RegexMatch): new regmatch() + * regex.c (regmatch_sub_anytime): add 5th arg `char *' + * regex.c (regmatch_iter): add 4th arg `char *' + * regex.c (regmatch): add 3rd arg `char *' + * url.c (openSocket): use new regex + 2002-01-10 aito@fw.ipsj.or.jp * [w3m-dev 02811] new regexp implementation @@ -1818,4 +1832,4 @@ * release-0-2-1 * import w3m-0.2.1 -$Id: ChangeLog,v 1.203 2002/01/10 04:55:06 ukai Exp $ +$Id: ChangeLog,v 1.204 2002/01/10 15:39:21 ukai Exp $ @@ -1,4 +1,4 @@ -/* $Id: cookie.c,v 1.6 2001/12/03 18:29:37 ukai Exp $ */ +/* $Id: cookie.c,v 1.7 2002/01/10 15:39:21 ukai Exp $ */ /* * References for version 0 cookie: @@ -45,9 +45,9 @@ domain_match(char *host, char *domain) /* [RFC 2109] s. 2, "domain-match", case 1 * (both are IP and identical) */ - regexCompile("[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*", 0); - m0 = regexMatch(host, 0, 1); - m1 = regexMatch(domain, 0, 1); + regexCompile("[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+", 0); + m0 = regexMatch(host, -1, 1); + m1 = regexMatch(domain, -1, 1); if (m0 && m1) { if (strcasecmp(host, domain) == 0) return host; @@ -1,4 +1,4 @@ -/* $Id: main.c,v 1.55 2002/01/04 19:14:36 ukai Exp $ */ +/* $Id: main.c,v 1.56 2002/01/10 15:39:21 ukai Exp $ */ #define MAINPROGRAM #include "fm.h" #include <signal.h> @@ -4147,10 +4147,7 @@ void chkURLBuffer(Buffer *buf) { static char *url_like_pat[] = { - "http://[a-zA-Z0-9][a-zA-Z0-9:%\\-\\./?=~_\\&+@#,\\$;]*[a-zA-Z0-9_/=]", -#ifdef USE_SSL - "https://[a-zA-Z0-9][a-zA-Z0-9:%\\-\\./?=~_\\&+@#,\\$;]*[a-zA-Z0-9_/=]", -#endif /* USE_SSL */ + "https?://[a-zA-Z0-9][a-zA-Z0-9:%\\-\\./?=~_\\&+@#,\\$;]*[a-zA-Z0-9_/=]", #ifdef USE_GOPHER "gopher://[a-zA-Z0-9][a-zA-Z0-9:%\\-\\./_]*", #endif /* USE_GOPHER */ @@ -4161,10 +4158,7 @@ chkURLBuffer(Buffer *buf) #endif /* USE_NNTP */ "mailto:[^<> ][^<> ]*@[a-zA-Z0-9][a-zA-Z0-9\\-\\._]*[a-zA-Z0-9]", #ifdef INET6 - "http://[a-zA-Z0-9:%\\-\\./_@]*\\[[a-fA-F0-9:][a-fA-F0-9:\\.]*\\][a-zA-Z0-9:%\\-\\./?=~_\\&+@#,\\$;]*", -#ifdef USE_SSL - "https://[a-zA-Z0-9:%\\-\\./_@]*\\[[a-fA-F0-9:][a-fA-F0-9:\\.]*\\][a-zA-Z0-9:%\\-\\./?=~_\\&+@#,\\$;]*", -#endif /* USE_SSL */ + "https?://[a-zA-Z0-9:%\\-\\./_@]*\\[[a-fA-F0-9:][a-fA-F0-9:\\.]*\\][a-zA-Z0-9:%\\-\\./?=~_\\&+@#,\\$;]*", "ftp://[a-zA-Z0-9:%\\-\\./_@]*\\[[a-fA-F0-9:][a-fA-F0-9:\\.]*\\][a-zA-Z0-9:%\\-\\./=_+@#,\\$]*", #endif /* INET6 */ NULL @@ -4189,7 +4183,7 @@ void chkNMIDBuffer(Buffer *buf) { static char *url_like_pat[] = { - "<[^<> ][^<> ]*@[A-z0-9\\.\\-_][A-z0-9\\.\\-_]*>", + "<[^<> ][^<> ]*@[A-z0-9\\.\\-_]+>", NULL, }; int i; @@ -1,4 +1,4 @@ -/* $Id: menu.c,v 1.9 2001/12/10 17:02:44 ukai Exp $ */ +/* $Id: menu.c,v 1.10 2002/01/10 15:39:21 ukai Exp $ */ /* * w3m menu.c */ @@ -930,7 +930,7 @@ menuForwardSearch(Menu *menu, char *str, int from) from = 0; for (i = from; i < menu->nitem; i++) if (menu->item[i].type != MENU_NOP && - regexMatch(menu->item[i].label, 0, 1) == 1) + regexMatch(menu->item[i].label, -1, 1) == 1) return i; return -1; } @@ -1,4 +1,4 @@ -/* $Id: regex.c,v 1.7 2002/01/10 04:55:07 ukai Exp $ */ +/* $Id: regex.c,v 1.8 2002/01/10 15:39:21 ukai Exp $ */ /* * regex: Regular expression pattern match library * @@ -66,7 +66,7 @@ static Regex DefaultRegex; #define CompiledRegex DefaultRegex.re #define Cstorage DefaultRegex.storage -static int regmatch(regexchar *, char *, int, char **); +static int regmatch(regexchar *, char *, char *, int, char **); static int regmatch1(regexchar *, longchar); static int matchWhich(longchar *, longchar); @@ -261,7 +261,7 @@ RegexMatch(Regex *re, char *str, int len, int firstp) if (str == NULL) return 0; - if (len == 0) + if (len < 0) len = strlen(str); re->position = NULL; ep = str + len; @@ -269,7 +269,7 @@ RegexMatch(Regex *re, char *str, int len, int firstp) lpos = NULL; re->lposition = NULL; for (r = re; r != NULL; r = r->alt_regex) { - switch (regmatch(r->re, p, firstp && (p == str), &lpos)) { + switch (regmatch(r->re, p, ep, firstp && (p == str), &lpos)) { case 1: /* matched */ re->position = p; if (re->lposition == NULL || re->lposition < lpos) @@ -341,11 +341,13 @@ struct MatchingContext2 { #define YIELD(retval,context,lnum) (context)->label = lnum; return (retval); label##lnum: -static int regmatch_iter(struct MatchingContext1 *, regexchar *, char *, int); +static int regmatch_iter(struct MatchingContext1 *, + regexchar *, char *, char *, int); static int regmatch_sub_anytime(struct MatchingContext2 *c, Regex *regex, - regexchar * pat2, char *str, int iter_limit, int firstp) + regexchar * pat2, + char *str, char *end_p, int iter_limit, int firstp) { switch (c->label) { case 1: @@ -364,7 +366,7 @@ regmatch_sub_anytime(struct MatchingContext2 *c, Regex *regex, c->firstp = firstp; for (;;) { c->ctx->label = 0; - while (regmatch_iter(c->ctx, c->regex->re, c->str, c->firstp)) { + while (regmatch_iter(c->ctx, c->regex->re, c->str, end_p, c->firstp)) { c->n_any = c->ctx->lastpos - c->str; if (c->n_any <= 0) continue; @@ -373,7 +375,7 @@ regmatch_sub_anytime(struct MatchingContext2 *c, Regex *regex, c->lastpos = c->str + c->n_any; YIELD(1, c, 1); } - else if (regmatch(pat2, c->str + c->n_any, + else if (regmatch(pat2, c->str + c->n_any, end_p, c->firstp, &c->lastpos) == 1) { YIELD(1, c, 2); } @@ -381,8 +383,8 @@ regmatch_sub_anytime(struct MatchingContext2 *c, Regex *regex, continue; c->ctx2->label = 0; while (regmatch_sub_anytime(c->ctx2, regex, pat2, - c->str + c->n_any, iter_limit - 1, - c->firstp)) { + c->str + c->n_any, end_p, + iter_limit - 1, c->firstp)) { c->lastpos = c->ctx2->lastpos; YIELD(1, c, 3); @@ -397,7 +399,7 @@ regmatch_sub_anytime(struct MatchingContext2 *c, Regex *regex, static int regmatch_iter(struct MatchingContext1 *c, - regexchar * re, char *str, int firstp) + regexchar * re, char *str, char *end_p, int firstp) { switch (c->label) { case 1: @@ -418,10 +420,11 @@ regmatch_iter(struct MatchingContext1 *c, if (RE_MODE(re) == RE_ENDMARK) return 0; c->re = re; - c->end_p = str + strlen(str); c->firstp = firstp; c->str = str; + c->end_p = end_p; c->sub_ctx = NULL; + c->lastpos = NULL; while (RE_MODE(c->re) != RE_ENDMARK) { if (c->re->mode & (RE_ANYTIME | RE_OPT)) { if (c->re->mode & RE_ANYTIME) @@ -441,6 +444,7 @@ regmatch_iter(struct MatchingContext1 *c, c->re->p.sub, c->re + 1, c->str + c->n_any, + c->end_p, c->iter_limit, c->firstp)) { c->n_any = c->ctx2->lastpos - c->str; @@ -480,7 +484,7 @@ regmatch_iter(struct MatchingContext1 *c, c->lastpos = c->str + c->n_any; YIELD(1, c, 2); } - else if (regmatch(c->re + 1, c->str + c->n_any, + else if (regmatch(c->re + 1, c->str + c->n_any, c->end_p, c->firstp, &c->lastpos) == 1) { YIELD(1, c, 3); } @@ -488,8 +492,6 @@ regmatch_iter(struct MatchingContext1 *c, return 0; } /* regexp other than pat*, pat+ and pat? */ - if (c->str >= c->end_p) - return 0; switch (RE_MODE(c->re)) { case RE_BEGIN: if (!c->firstp) @@ -497,9 +499,15 @@ regmatch_iter(struct MatchingContext1 *c, c->re++; break; case RE_END: - c->lastpos = c->str; - c->re++; - YIELD((c->str >= c->end_p), c, 4); + if (c->str >= c->end_p) { + c->lastpos = c->str; + c->re++; + YIELD(1, c, 4); + } + else { + c->lastpos = NULL; + return 0; + } break; case RE_SUBREGEX: if (c->sub_ctx == NULL) { @@ -509,14 +517,14 @@ regmatch_iter(struct MatchingContext1 *c, for (;;) { c->sub_ctx->label = 0; while (regmatch_iter(c->sub_ctx, c->sub_regex->re, - c->str, c->firstp)) { + c->str, c->end_p, c->firstp)) { if (c->sub_ctx->lastpos != c->str) c->firstp = 0; if (RE_MODE(c->re + 1) == RE_ENDMARK) { c->lastpos = c->sub_ctx->lastpos; YIELD(1, c, 5); } - else if (regmatch(c->re + 1, c->sub_ctx->lastpos, + else if (regmatch(c->re + 1, c->sub_ctx->lastpos, c->end_p, c->firstp, &c->lastpos) == 1) { YIELD(1, c, 6); } @@ -557,14 +565,14 @@ regmatch_iter(struct MatchingContext1 *c, } static int -regmatch(regexchar * re, char *str, int firstp, char **lastpos) +regmatch(regexchar * re, char *str, char *end_p, int firstp, char **lastpos) { struct MatchingContext1 contx; *lastpos = NULL; contx.label = 0; - while (regmatch_iter(&contx, re, str, firstp)) { + while (regmatch_iter(&contx, re, str, end_p, firstp)) { #ifdef REGEX_DEBUG char *p; if (verbose) { @@ -756,14 +764,15 @@ main(int argc, char **argv) printf("Error on regexp /%s/: %s\n", buf, msg); exit(1); } - if (RegexMatch(re, buf2, 0, 1)) { - printf("/%s/\t%s\t", buf, buf2); + if (RegexMatch(re, buf2, -1, 1)) { + printf("/%s/\t\"%s\"\t\"", buf, buf2); MatchedPosition(re, &fpos, &epos); while (fpos < epos) putchar(*(fpos++)); + putchar('"'); } else - printf("/%s/\t%s\tno_match", buf, buf2); + printf("/%s/\t\"%s\"\tno_match", buf, buf2); putchar('\n'); } /* notreatched */ @@ -1,4 +1,4 @@ -/* $Id: regex.h,v 1.4 2002/01/10 04:55:07 ukai Exp $ */ +/* $Id: regex.h,v 1.5 2002/01/10 15:39:21 ukai Exp $ */ #define REGEX_MAX 64 #define STORAGE_MAX 256 @@ -1,4 +1,4 @@ -/* $Id: url.c,v 1.29 2002/01/07 16:28:17 ukai Exp $ */ +/* $Id: url.c,v 1.30 2002/01/10 15:39:21 ukai Exp $ */ #include "fm.h" #include <sys/types.h> #include <sys/socket.h> @@ -626,8 +626,8 @@ openSocket(char *const hostname, #endif goto error; } - regexCompile("^[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*$", 0); - if (regexMatch(hostname, 0, 1)) { + regexCompile("^[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$", 0); + if (regexMatch(hostname, -1, 1)) { sscanf(hostname, "%d.%d.%d.%d", &a1, &a2, &a3, &a4); adr = htonl((a1 << 24) | (a2 << 16) | (a3 << 8) | a4); bcopy((void *)&adr, (void *)&hostaddr.sin_addr, sizeof(long)); |