From e718702d1281d1e92d162d9846081364ebb5f4df Mon Sep 17 00:00:00 2001 From: Fumitoshi UKAI Date: Thu, 23 Jan 2003 18:01:01 +0000 Subject: * etc.c (next_status): after = is R_ST_VALUE R_ST_VALUE > => R_ST_NORMAL R_ST_VALUE SP => R_ST_TAG (read_token): R_ST_VALUE (correct_irrtag): R_ST_VALUE * rm.h (R_ST_VALUE): added (ST_IS_REAL_TAG): rewrite (ST_IS_COMMENT): deleted (ST_IS_TAG): deleted * parsetagx.c (parse_tag): skip too long tagname skip too long attrname if attrvalue has quote char, need reconstruct if unknown attr, need reconstruct * table.c (visible_length): R_ST_VALUE From: Hironori SAKAMOTO --- ChangeLog | 19 ++++++++++++++++++- etc.c | 12 ++++++++++-- fm.h | 7 +++---- parsetagx.c | 22 +++++++++++++++------- table.c | 5 +++-- 5 files changed, 49 insertions(+), 16 deletions(-) diff --git a/ChangeLog b/ChangeLog index b9ad15b..17addf6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +2003-01-24 Hironori SAKAMOTO + + * etc.c (next_status): after = is R_ST_VALUE + R_ST_VALUE > => R_ST_NORMAL + R_ST_VALUE SP => R_ST_TAG + (read_token): R_ST_VALUE + (correct_irrtag): R_ST_VALUE + * rm.h (R_ST_VALUE): added + (ST_IS_REAL_TAG): rewrite + (ST_IS_COMMENT): deleted + (ST_IS_TAG): deleted + * parsetagx.c (parse_tag): skip too long tagname + skip too long attrname + if attrvalue has quote char, need reconstruct + if unknown attr, need reconstruct + * table.c (visible_length): R_ST_VALUE + 2003-01-24 Hironori SAKAMOTO * [w3m-dev 03680] showProgress() on fast system @@ -6726,4 +6743,4 @@ a * [w3m-dev 03276] compile error on EWS4800 * release-0-2-1 * import w3m-0.2.1 -$Id: ChangeLog,v 1.707 2003/01/23 16:05:54 ukai Exp $ +$Id: ChangeLog,v 1.708 2003/01/23 18:01:01 ukai Exp $ diff --git a/etc.c b/etc.c index 93e411a..d300268 100644 --- a/etc.c +++ b/etc.c @@ -1,4 +1,4 @@ -/* $Id: etc.c,v 1.56 2003/01/23 16:02:15 ukai Exp $ */ +/* $Id: etc.c,v 1.57 2003/01/23 18:01:05 ukai Exp $ */ #include "fm.h" #include #include "myctype.h" @@ -636,7 +636,7 @@ next_status(char c, int *status) else if (c == '>') *status = R_ST_NORMAL; else - *status = R_ST_TAG; + *status = R_ST_VALUE; return 0; case R_ST_QUOTE: if (c == '\'') @@ -646,6 +646,12 @@ next_status(char c, int *status) if (c == '"') *status = R_ST_TAG; return 0; + case R_ST_VALUE: + if (c == '>') + *status = R_ST_NORMAL; + else if (IS_SPACE(c)) + *status = R_ST_TAG; + return 0; case R_ST_AMP: if (c == ';') { *status = R_ST_NORMAL; @@ -792,6 +798,7 @@ read_token(Str buf, char **instr, int *status, int pre, int append) case R_ST_EQL: case R_ST_QUOTE: case R_ST_DQUOTE: + case R_ST_VALUE: case R_ST_AMP: Strcat_char(buf, *p); break; @@ -838,6 +845,7 @@ correct_irrtag(int status) case R_ST_TAG: case R_ST_TAG0: case R_ST_EQL: /* required ">" */ + case R_ST_VALUE: c = '>'; break; case R_ST_QUOTE: diff --git a/fm.h b/fm.h index 91c7897..a4c135c 100644 --- a/fm.h +++ b/fm.h @@ -1,4 +1,4 @@ -/* $Id: fm.h,v 1.105 2003/01/22 16:16:20 ukai Exp $ */ +/* $Id: fm.h,v 1.106 2003/01/23 18:01:06 ukai Exp $ */ /* * w3m: WWW wo Miru utility * @@ -623,10 +623,9 @@ struct readbuffer { #define R_ST_NCMNT2 12 /* comment -- */ #define R_ST_NCMNT3 13 /* comment -- space */ #define R_ST_IRRTAG 14 /* within irregular tag */ +#define R_ST_VALUE 15 /* within tag attribule value */ -#define ST_IS_REAL_TAG(s) ((s)==R_ST_TAG||(s)==R_ST_TAG0||(s)==R_ST_EQL) -#define ST_IS_COMMENT(s) ((s)>=R_ST_CMNT1) -#define ST_IS_TAG(s) ((s)!=R_ST_NORMAL&&(s)!=R_ST_AMP&&!ST_IS_COMMENT(s)&&(s)!=R_ST_EOL) +#define ST_IS_REAL_TAG(s) ((s)==R_ST_TAG||(s)==R_ST_TAG0||(s)==R_ST_EQL||(s)==R_ST_VALUE) /* is this '<' really means the beginning of a tag? */ #define REALLY_THE_BEGINNING_OF_A_TAG(p) \ diff --git a/parsetagx.c b/parsetagx.c index de51801..bda980d 100644 --- a/parsetagx.c +++ b/parsetagx.c @@ -1,4 +1,4 @@ -/* $Id: parsetagx.c,v 1.12 2002/12/24 17:20:48 ukai Exp $ */ +/* $Id: parsetagx.c,v 1.13 2003/01/23 18:01:07 ukai Exp $ */ #include "fm.h" #include "myctype.h" #include "indep.h" @@ -132,6 +132,9 @@ parse_tag(char **s, int internal) q++; } *p = '\0'; + while (*q && !IS_SPACE(*q) && !(tagname[0] != '/' && *q == '/') && + *q != '>') + q++; tag_id = getHash_si(&tagtable, tagname, HTML_UNKNOWN); @@ -165,11 +168,9 @@ parse_tag(char **s, int internal) *(p++) = TOLOWER(*q); q++; } - if (q == p) { - q++; - continue; - } *p = '\0'; + while (*q && *q != '=' && !IS_SPACE(*q) && *q != '>') + q++; SKIP_BLANKS(q); if (*q == '=') { /* get value */ @@ -201,8 +202,12 @@ parse_tag(char **s, int internal) q++; } else if (*q) { - while (*q && !IS_SPACE(*q) && *q != '>') - Strcat_char(value, *q++); + while (*q && !IS_SPACE(*q) && *q != '>') { + Strcat_char(value, *q); + if (!tag->need_reconstruct && html_quote_char(*q)) + tag->need_reconstruct = TRUE; + q++; + } } } for (i = 0; i < nattr; i++) { @@ -227,6 +232,9 @@ parse_tag(char **s, int internal) else tag->value[i] = NULL; } + else { + tag->need_reconstruct = TRUE; + } } skip_parse_tagarg: diff --git a/table.c b/table.c index 606452d..3271f1c 100644 --- a/table.c +++ b/table.c @@ -1,4 +1,4 @@ -/* $Id: table.c,v 1.38 2003/01/20 15:30:21 ukai Exp $ */ +/* $Id: table.c,v 1.39 2003/01/23 18:01:08 ukai Exp $ */ /* * HTML table */ @@ -484,7 +484,8 @@ visible_length(char *str) Strcat_char(tagbuf, *str); } else if (status == R_ST_TAG || status == R_ST_DQUOTE - || status == R_ST_QUOTE || status == R_ST_EQL) { + || status == R_ST_QUOTE || status == R_ST_EQL + || status == R_ST_VALUE) { Strcat_char(tagbuf, *str); } else if (status == R_ST_AMP) { -- cgit v1.2.3