From 569d535b29255d860e7c3755069d60265a5e9085 Mon Sep 17 00:00:00 2001 From: Fumitoshi UKAI Date: Tue, 3 Dec 2002 15:35:09 +0000 Subject: [w3m-dev 03509] HTML parser * file.c (close_textarea): delete (HTMLtagproc1): rewrite delete HTML_EOL move HTML_LISTING, HTML_N_LISTING add HTML_PRE_PLAIN, HTML_N_PRE_PLAIN add HTML_PLAINTEXT end_tag (HTMLlineproc0): s/str/line/ rewrite (completeHTMLstream): if necessary * fm.h (struct readbuffer): delete ignore_tag add end_tag (RB_XMPMODE): deleted (RB_LSTMODE): deleted (RB_SCRIPT): added (RB_STYLE): added (RB_*): renumber (R_ST_EOL): added (R_ST_*): renumber (ST_IS_TAG): check R_ST_EOL * form.c (form_fputs_decode): remove handling * frame.c (newFrame): remove_space() (CASE_TABLE_TAG): added (createFrameFile): rewrite * html.c (TagMAP): delete eol add pre_plain, /pre_plain * html.h (HTML_EOL): deleted (HTML_PRE_PLAIN): added (HTML_N_PRE_PLAIN): added * table.c (visible_length): rewrite (visible_length_plain): added (maximum_visible_length_plain): added (do_refill): R_ST_EOL (table_close_select): end_tag (table_close_textarea): end_tag (TAG_ACTION_PLAIN): added (feed_table_tag): rewrite (feed_table): rewrite * table.h (TBLM_*) reassign (struct table_mode): delete ignore_tag add end_tag * tagtable.tab (eol): deleted (pre_plain): added (/pre_plain): added From: Hironori SAKAMOTO --- file.c | 327 ++++++++++++++++++++++++++++------------------------------------- 1 file changed, 143 insertions(+), 184 deletions(-) (limited to 'file.c') diff --git a/file.c b/file.c index bba3d3f..30c8ebb 100644 --- a/file.c +++ b/file.c @@ -1,4 +1,4 @@ -/* $Id: file.c,v 1.140 2002/12/03 15:00:53 ukai Exp $ */ +/* $Id: file.c,v 1.141 2002/12/03 15:35:10 ukai Exp $ */ #include "fm.h" #include #include "myctype.h" @@ -34,7 +34,6 @@ static FILE *lessopen_stream(char *path); static Buffer *loadcmdout(char *cmd, Buffer *(*loadproc) (URLFile *, Buffer *), Buffer *defaultbuf); -static void close_textarea(struct html_feed_environ *h_env); static void addnewline(Buffer *buf, char *line, Lineprop *prop, #ifdef USE_ANSI_COLOR Linecolor *color, @@ -4064,10 +4063,6 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) flushline(h_env, obuf, envs[h_env->envc].indent, 1, h_env->limit); h_env->blank_lines = 0; return 1; - case HTML_EOL: - if ((obuf->flag & RB_PREMODE) && obuf->pos > envs[h_env->envc].indent) - flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); - return 1; case HTML_H: if (!(obuf->flag & (RB_PREMODE | RB_IGNORE_P))) { flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); @@ -4366,46 +4361,74 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) if (obuf->nobr_level == 0) obuf->flag &= ~RB_NOBR; return 0; - case HTML_LISTING: + case HTML_PRE_PLAIN: CLOSE_P; - flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); - obuf->flag |= (RB_LSTMODE | RB_IGNORE_P); - /* istr = str; */ + if (!(obuf->flag & RB_IGNORE_P)) { + flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); + do_blankline(h_env, obuf, envs[h_env->envc].indent, 0, + h_env->limit); + } + obuf->flag |= (RB_PRE | RB_IGNORE_P); return 1; - case HTML_N_LISTING: + case HTML_N_PRE_PLAIN: CLOSE_P; - flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); - obuf->flag &= ~RB_LSTMODE; + if (!(obuf->flag & RB_IGNORE_P)) { + flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); + do_blankline(h_env, obuf, envs[h_env->envc].indent, 0, + h_env->limit); + obuf->flag |= RB_IGNORE_P; + } + obuf->flag &= ~RB_PRE; return 1; + case HTML_LISTING: case HTML_XMP: + case HTML_PLAINTEXT: CLOSE_P; - flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); - obuf->flag |= (RB_XMPMODE | RB_IGNORE_P); - /* istr = str; */ + if (!(obuf->flag & RB_IGNORE_P)) { + flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); + do_blankline(h_env, obuf, envs[h_env->envc].indent, 0, + h_env->limit); + } + obuf->flag |= (RB_PLAIN | RB_IGNORE_P); + switch (cmd) { + case HTML_LISTING: + obuf->end_tag = HTML_N_LISTING; + break; + case HTML_XMP: + obuf->end_tag = HTML_N_XMP; + break; + case HTML_PLAINTEXT: + obuf->end_tag = MAX_HTMLTAG; + break; + } return 1; + case HTML_N_LISTING: case HTML_N_XMP: CLOSE_P; - flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); - obuf->flag &= ~RB_XMPMODE; + if (!(obuf->flag & RB_IGNORE_P)) { + flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); + do_blankline(h_env, obuf, envs[h_env->envc].indent, 0, + h_env->limit); + obuf->flag |= RB_IGNORE_P; + } + obuf->flag &= ~RB_PLAIN; + obuf->end_tag = 0; return 1; case HTML_SCRIPT: - obuf->flag |= RB_IGNORE; - obuf->ignore_tag = Strnew_charp(""); - return 1; - case HTML_N_SCRIPT: - /* should not be reached */ + obuf->flag |= RB_SCRIPT; + obuf->end_tag = HTML_N_SCRIPT; return 1; case HTML_STYLE: - obuf->flag |= RB_IGNORE; - obuf->ignore_tag = Strnew_charp(""); + obuf->flag |= RB_STYLE; + obuf->end_tag = HTML_N_STYLE; return 1; - case HTML_N_STYLE: - /* should not be reached */ + case HTML_N_SCRIPT: + obuf->flag &= ~RB_SCRIPT; + obuf->end_tag = 0; return 1; - case HTML_PLAINTEXT: - flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); - obuf->flag |= RB_PLAIN; - /* istr = str; */ + case HTML_N_STYLE: + obuf->flag &= ~RB_STYLE; + obuf->end_tag = 0; return 1; case HTML_A: if (obuf->anchor) @@ -4513,7 +4536,7 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) table_mode[obuf->table_level].indent_level = 0; table_mode[obuf->table_level].nobr_level = 0; table_mode[obuf->table_level].caption = 0; - table_mode[obuf->table_level].ignore_tag = NULL; + table_mode[obuf->table_level].end_tag = 0; /* HTML_UNKNOWN */ #ifndef TABLE_EXPAND tables[obuf->table_level]->total_width = width; #else @@ -4572,9 +4595,11 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) if (tmp) HTMLlineproc1(tmp->ptr, h_env); obuf->flag |= RB_INSELECT; + obuf->end_tag = HTML_N_SELECT; return 1; case HTML_N_SELECT: obuf->flag &= ~RB_INSELECT; + obuf->end_tag = 0; tmp = process_n_select(); if (tmp) HTMLlineproc1(tmp->ptr, h_env); @@ -4587,9 +4612,14 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) if (tmp) HTMLlineproc1(tmp->ptr, h_env); obuf->flag |= RB_INTXTA; + obuf->end_tag = HTML_N_TEXTAREA; return 1; case HTML_N_TEXTAREA: - close_textarea(h_env); + obuf->flag &= ~RB_INTXTA; + obuf->end_tag = 0; + tmp = process_n_textarea(); + if (tmp) + HTMLlineproc1(tmp->ptr, h_env); return 1; case HTML_ISINDEX: p = ""; @@ -5448,10 +5478,9 @@ table_width(struct html_feed_environ *h_env, int table_level) /* HTML processing first pass */ void -HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal) +HTMLlineproc0(char *line, struct html_feed_environ *h_env, int internal) { Lineprop mode; - char *q; int cmd; struct readbuffer *obuf = h_env->obuf; int indent, delta; @@ -5467,25 +5496,12 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal) (obuf->flag & RB_PREMODE) ? 'P' : ' ', (obuf->table_level >= 0) ? 'T' : ' ', (obuf->flag & RB_INTXTA) ? 'X' : ' ', - (obuf->flag & RB_IGNORE) ? 'I' : ' '); - fprintf(f, "HTMLlineproc1(\"%s\",%d,%lx)\n", str, h_env->limit, + (obuf->flag & (RB_SCRIPT | RB_STYLE)) ? 'S' : ' '); + fprintf(f, "HTMLlineproc1(\"%s\",%d,%lx)\n", line, h_env->limit, (unsigned long)h_env); fclose(f); } -#if 0 - /* comment processing */ - if (obuf->status == R_ST_CMNT || obuf->status == R_ST_NCMNT3 || - obuf->status == R_ST_IRRTAG) { - while (*str != '\0' && obuf->status != R_ST_NORMAL) { - next_status(*str, &obuf->status); - str++; - } - if (obuf->status != R_ST_NORMAL) - return; - } -#endif - tokbuf = Strnew(); table_start: @@ -5496,132 +5512,93 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal) tbl_width = table_width(h_env, level); } - while (*str != '\0') { + while (*line != '\0') { + char *str, *p; int is_tag = FALSE; - int pre_mode = (obuf->table_level >= 0) ? - tbl_mode->pre_mode & TBLM_PLAIN : obuf->flag & RB_PLAINMODE; - - if (obuf->flag & RB_PLAIN) - goto read_as_plain; /* don't process tag */ + int pre_mode = (obuf->table_level >= 0) ? tbl_mode->pre_mode : + obuf->flag; + int end_tag = (obuf->table_level >= 0) ? tbl_mode->end_tag : + obuf->end_tag; - if (ST_IS_COMMENT(obuf->status)) { - read_token(h_env->tagbuf, &str, &obuf->status, pre_mode, 1); - if (obuf->status != R_ST_NORMAL) - return; - if (pre_mode) { - is_tag = TRUE; - q = h_env->tagbuf->ptr; - goto read_as_pre_mode; - } - continue; - } - if (*str == '<' || ST_IS_TAG(obuf->status)) { + if (*line == '<' || obuf->status != R_ST_NORMAL) { /* * Tag processing */ - if (ST_IS_TAG(obuf->status)) { -/*** continuation of a tag ***/ - read_token(h_env->tagbuf, &str, &obuf->status, pre_mode, 1); - } + if (obuf->status == R_ST_EOL) + obuf->status = R_ST_NORMAL; else { - if (!REALLY_THE_BEGINNING_OF_A_TAG(str)) { - /* this is NOT a beginning of a tag */ - obuf->status = R_ST_NORMAL; - if (pre_mode) - goto read_as_pre_mode; - HTMLlineproc1("<", h_env); - str++; - continue; - } - read_token(h_env->tagbuf, &str, &obuf->status, pre_mode, 0); - } -#if 0 - if (ST_IS_COMMENT(obuf->status)) { - if ((obuf->table_level >= 0) ? tbl_mode->pre_mode & TBLM_IGNORE - : obuf->flag & RB_IGNORE) - /* within ignored tag, such as * - * , don't process comment. */ - obuf->status = R_ST_NORMAL; - return; + read_token(h_env->tagbuf, &line, &obuf->status, + pre_mode & RB_PREMODE, obuf->status != R_ST_NORMAL); + if (obuf->status != R_ST_NORMAL) + return; } -#endif if (h_env->tagbuf->length == 0) continue; - if (obuf->status != R_ST_NORMAL) { - if (!pre_mode) { - if (Strlastchar(h_env->tagbuf) == '\n') - Strchop(h_env->tagbuf); - if (ST_IS_REAL_TAG(obuf->status)) - Strcat_char(h_env->tagbuf, ' '); + str = h_env->tagbuf->ptr; + if (*str == '<') { + if (str[1] && REALLY_THE_BEGINNING_OF_A_TAG(str)) + is_tag = TRUE; + else if (!(pre_mode & (RB_PLAIN | RB_INTXTA | RB_INSELECT | + RB_SCRIPT | RB_STYLE))) { + line = Strnew_m_charp(str + 1, line, NULL)->ptr; + str = "<"; } - if ((obuf->table_level >= 0) - ? ((tbl_mode->pre_mode & TBLM_IGNORE) && - !TAG_IS(h_env->tagbuf->ptr, tbl_mode->ignore_tag->ptr, - tbl_mode->ignore_tag->length - 1)) - : ((obuf->flag & RB_IGNORE) && - !TAG_IS(h_env->tagbuf->ptr, obuf->ignore_tag->ptr, - obuf->ignore_tag->length - 1))) - /* within ignored tag, such as * - * , don't process tag. */ - obuf->status = R_ST_NORMAL; - continue; } - is_tag = TRUE; - q = h_env->tagbuf->ptr; + } + else { + read_token(tokbuf, &line, &obuf->status, pre_mode & RB_PREMODE, 0); + if (obuf->status != R_ST_NORMAL) /* R_ST_AMP ? */ + continue; + str = tokbuf->ptr; } - read_as_pre_mode: - if (obuf->flag & (RB_INTXTA | RB_INSELECT | RB_IGNORE)) { - cmd = HTML_UNKNOWN; - if (!is_tag) { - read_token(tokbuf, &str, &obuf->status, - (obuf->flag & RB_INTXTA) ? 1 : 0, 0); - if (obuf->status != R_ST_NORMAL) - continue; - q = tokbuf->ptr; - } - else { - char *p = q; - cmd = gethtmlcmd(&p); - } - - /* textarea */ - if (obuf->flag & RB_INTXTA) { - if (cmd == HTML_N_TEXTAREA) - goto proc_normal; - feed_textarea(q); + if (pre_mode & (RB_PLAIN | RB_INTXTA | RB_INSELECT | RB_SCRIPT | + RB_STYLE)) { + if (is_tag) { + p = str; + if ((tag = parse_tag(&p, internal))) { + if (tag->tagid == end_tag || + (pre_mode & RB_INSELECT && tag->tagid == HTML_N_FORM)) + goto proc_normal; + } } - else if (obuf->flag & RB_INSELECT) { - if (cmd == HTML_N_SELECT || cmd == HTML_N_FORM) + /* select */ + if (pre_mode & RB_INSELECT) { + if (obuf->table_level >= 0) goto proc_normal; - feed_select(q); + feed_select(str); + continue; } - /* script */ - else if (obuf->flag & RB_IGNORE) { - if (TAG_IS(q, obuf->ignore_tag->ptr, - obuf->ignore_tag->length - 1)) { - obuf->flag &= ~RB_IGNORE; + if (is_tag) { + if (strncmp(str, "