diff options
author | Fumitoshi UKAI <ukai@debian.or.jp> | 2002-12-03 15:35:09 +0000 |
---|---|---|
committer | Fumitoshi UKAI <ukai@debian.or.jp> | 2002-12-03 15:35:09 +0000 |
commit | 569d535b29255d860e7c3755069d60265a5e9085 (patch) | |
tree | b669930e719f182e8663bf2cd6afa62708183dc1 /frame.c | |
parent | update (diff) | |
download | w3m-569d535b29255d860e7c3755069d60265a5e9085.tar.gz w3m-569d535b29255d860e7c3755069d60265a5e9085.zip |
[w3m-dev 03509] HTML parser
* file.c (close_textarea): delete
(HTMLtagproc1): rewrite
delete HTML_EOL
move HTML_LISTING, HTML_N_LISTING
add HTML_PRE_PLAIN, HTML_N_PRE_PLAIN
add HTML_PLAINTEXT
end_tag
(HTMLlineproc0): s/str/line/
rewrite
(completeHTMLstream): </textarea> if necessary
* fm.h (struct readbuffer): delete ignore_tag
add end_tag
(RB_XMPMODE): deleted
(RB_LSTMODE): deleted
(RB_SCRIPT): added
(RB_STYLE): added
(RB_*): renumber
(R_ST_EOL): added
(R_ST_*): renumber
(ST_IS_TAG): check R_ST_EOL
* form.c (form_fputs_decode): remove <eol> handling
* frame.c (newFrame): remove_space()
(CASE_TABLE_TAG): added
(createFrameFile): rewrite
* html.c (TagMAP): delete eol
add pre_plain, /pre_plain
* html.h (HTML_EOL): deleted
(HTML_PRE_PLAIN): added
(HTML_N_PRE_PLAIN): added
* table.c (visible_length): rewrite
(visible_length_plain): added
(maximum_visible_length_plain): added
(do_refill): R_ST_EOL
(table_close_select): end_tag
(table_close_textarea): end_tag
(TAG_ACTION_PLAIN): added
(feed_table_tag): rewrite
(feed_table): rewrite
* table.h (TBLM_*) reassign
(struct table_mode): delete ignore_tag
add end_tag
* tagtable.tab (eol): deleted
(pre_plain): added
(/pre_plain): added
From: Hironori SAKAMOTO <hsaka@mth.biglobe.ne.jp>
Diffstat (limited to '')
-rw-r--r-- | frame.c | 160 |
1 files changed, 132 insertions, 28 deletions
@@ -1,4 +1,4 @@ -/* $Id: frame.c,v 1.22 2002/11/28 16:00:34 ukai Exp $ */ +/* $Id: frame.c,v 1.23 2002/12/03 15:35:10 ukai Exp $ */ #include "fm.h" #include "parsetagx.h" #include "myctype.h" @@ -97,7 +97,7 @@ newFrame(struct parsed_tag *tag, Buffer *buf) body->baseURL = baseURL(buf); if (tag) { if (parsedtag_get_value(tag, ATTR_SRC, &p)) - body->url = url_quote_conv(p, buf->document_code); + body->url = url_quote_conv(remove_space(p), buf->document_code); if (parsedtag_get_value(tag, ATTR_NAME, &p) && *p != '_') body->name = url_quote_conv(p, buf->document_code); } @@ -412,6 +412,23 @@ frame_download_source(struct frame_body *b, ParsedURL *currentURL, return ret_frameset; } +#define CASE_TABLE_TAG \ + case HTML_TR:\ + case HTML_N_TR:\ + case HTML_TD:\ + case HTML_N_TD:\ + case HTML_TH:\ + case HTML_N_TH:\ + case HTML_THEAD:\ + case HTML_N_THEAD:\ + case HTML_TBODY:\ + case HTML_N_TBODY:\ + case HTML_TFOOT:\ + case HTML_N_TFOOT:\ + case HTML_COLGROUP:\ + case HTML_N_COLGROUP:\ + case HTML_COL + static int createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, int force_reload) @@ -467,8 +484,10 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, struct frameset *f_frameset; int i = c + r * f->col; char *p = ""; + int status = R_ST_NORMAL; Str tok = Strnew(); - int status; + int pre_mode = 0; + int end_tag = 0; frame = f->frame[i]; @@ -557,12 +576,13 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, break; } do { - status = R_ST_NORMAL; + int is_tag = FALSE; + char *q; + struct parsed_tag *tag; + do { if (*p == '\0') { Str tmp = StrmyUFgets(&f2); - if (tmp->length == 0 && status != R_ST_NORMAL) - tmp = correct_irrtag(status); if (tmp->length == 0) break; #ifdef JP_CHARSET @@ -573,21 +593,67 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, cleanup_line(tmp, HTML_MODE); p = tmp->ptr; } - if (status == R_ST_NORMAL) - read_token(tok, &p, &status, 1, 0); - else if (ST_IS_COMMENT(status)) - read_token(tok, &p, &status, 0, 0); - else - read_token(tok, &p, &status, 1, 1); + read_token(tok, &p, &status, 1, status != R_ST_NORMAL); } while (status != R_ST_NORMAL); if (tok->length == 0) continue; if (tok->ptr[0] == '<') { + is_tag = TRUE; + if (pre_mode & (RB_PLAIN | RB_INTXTA | RB_SCRIPT | + RB_STYLE)) { + q = tok->ptr; + if ((tag = parse_tag(&q, FALSE)) && + tag->tagid == end_tag) { + if (pre_mode & RB_PLAIN) { + fputs("</PRE_PLAIN>", f1); + pre_mode = 0; + end_tag = 0; + goto token_end; + } + pre_mode = 0; + end_tag = 0; + goto proc_normal; + } + if (strncmp(tok->ptr, "<!--", 4) && + (q = strchr(tok->ptr + 1, '<'))) { + tok = Strnew_charp_n(tok->ptr, q - tok->ptr); + p = Strnew_m_charp(q, p, NULL)->ptr; + status = R_ST_NORMAL; + } + is_tag = FALSE; + } + else if (pre_mode & RB_INSELECT) { + q = tok->ptr; + if ((tag = parse_tag(&q, FALSE))) { + if ((tag->tagid == end_tag) || + (tag->tagid == HTML_N_FORM)) { + if (tag->tagid == HTML_N_FORM) + fputs("</SELECT>", f1); + pre_mode = 0; + end_tag = 0; + goto proc_normal; + } + if (t_stack) { + switch (tag->tagid) { + case HTML_TABLE: + case HTML_N_TABLE: + CASE_TABLE_TAG: + fputs("</SELECT>", f1); + pre_mode = 0; + end_tag = 0; + goto proc_normal; + } + } + } + } + } + + proc_normal: + if (is_tag) { char *q = tok->ptr; int j, a_target = 0; - struct parsed_tag *tag; ParsedURL url; if (!(tag = parse_tag(&q, FALSE))) @@ -603,7 +669,7 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, case HTML_BASE: /* "BASE" is prohibit tag */ if (parsedtag_get_value(tag, ATTR_HREF, &q)) { - q = url_quote_conv(q, code); + q = url_quote_conv(remove_space(q), code); parseURL(q, &base, NULL); } if (parsedtag_get_value(tag, ATTR_TARGET, &q)) { @@ -660,18 +726,7 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, goto token_end; } break; - case HTML_THEAD: - case HTML_N_THEAD: - case HTML_TBODY: - case HTML_N_TBODY: - case HTML_TFOOT: - case HTML_N_TFOOT: - case HTML_TD: - case HTML_N_TD: - case HTML_TR: - case HTML_N_TR: - case HTML_TH: - case HTML_N_TH: + CASE_TABLE_TAG: /* table_tags MUST be in table stack */ if (!t_stack) { Strshrinkfirst(tok, 1); @@ -682,6 +737,37 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, } break; + case HTML_SELECT: + pre_mode = RB_INSELECT; + end_tag = HTML_N_SELECT; + break; + case HTML_TEXTAREA: + pre_mode = RB_INTXTA; + end_tag = HTML_N_TEXTAREA; + break; + case HTML_SCRIPT: + pre_mode = RB_SCRIPT; + end_tag = HTML_N_SCRIPT; + break; + case HTML_STYLE: + pre_mode = RB_STYLE; + end_tag = HTML_N_STYLE; + break; + case HTML_LISTING: + pre_mode = RB_PLAIN; + end_tag = HTML_N_LISTING; + fputs("<PRE_PLAIN>", f1); + goto token_end; + case HTML_XMP: + pre_mode = RB_PLAIN; + end_tag = HTML_N_XMP; + fputs("<PRE_PLAIN>", f1); + goto token_end; + case HTML_PLAINTEXT: + pre_mode = RB_PLAIN; + end_tag = MAX_HTMLTAG; + fputs("<PRE_PLAIN>", f1); + goto token_end; default: break; } @@ -693,7 +779,8 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, if (!tag->value[j]) break; tag->value[j] = - url_quote_conv(tag->value[j], code); + url_quote_conv(remove_space(tag->value[j]), + code); parseURL2(tag->value[j], &url, &base); if (url.scheme == SCM_UNKNOWN || #ifndef USE_W3MMAILER @@ -748,11 +835,28 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level, Strfputs(tok, f1); } else { - Strfputs(tok, f1); + if (pre_mode & (RB_PLAIN | RB_INTXTA)) + fprintf(f1, "%s", html_quote(tok->ptr)); + else + Strfputs(tok, f1); } token_end: Strclear(tok); } while (*p != '\0' || !iseos(f2.stream)); + if (pre_mode & RB_PLAIN) + fputs("</PRE_PLAIN>\n", f1); + else if (pre_mode & RB_INTXTA) + fputs("</TEXTAREA></FORM>\n", f1); + else if (pre_mode & RB_INSELECT) + fputs("</SELECT></FORM>\n", f1); + else if (pre_mode & (RB_SCRIPT | RB_STYLE)) { + if (status != R_ST_NORMAL) + fputs(correct_irrtag(status)->ptr, f1); + if (pre_mode & RB_SCRIPT) + fputs("</SCRIPT>\n", f1); + else if (pre_mode & RB_STYLE) + fputs("</STYLE>\n", f1); + } while (t_stack--) fputs("</TABLE>\n", f1); UFclose(&f2); |