diff options
| -rw-r--r-- | ChangeLog | 50 | ||||
| -rw-r--r-- | file.c | 327 | ||||
| -rw-r--r-- | fm.h | 66 | ||||
| -rw-r--r-- | form.c | 12 | ||||
| -rw-r--r-- | frame.c | 160 | ||||
| -rw-r--r-- | html.c | 7 | ||||
| -rw-r--r-- | html.h | 9 | ||||
| -rw-r--r-- | table.c | 256 | ||||
| -rw-r--r-- | table.h | 30 | ||||
| -rw-r--r-- | tagtable.tab | 3 | 
10 files changed, 553 insertions, 367 deletions
@@ -1,3 +1,51 @@ +2002-12-04  Hironori SAKAMOTO <hsaka@mth.biglobe.ne.jp> + +	* [w3m-dev 03509] HTML parser +	* file.c (close_textarea): delete +		(HTMLtagproc1): rewrite  +				delete HTML_EOL +				move HTML_LISTING, HTML_N_LISTING +				add HTML_PRE_PLAIN, HTML_N_PRE_PLAIN +				add HTML_PLAINTEXT +				end_tag +		(HTMLlineproc0): s/str/line/ +				rewrite +		(completeHTMLstream): </textarea> if necessary +	* fm.h (struct readbuffer): delete ignore_tag +				add end_tag +		(RB_XMPMODE): deleted +		(RB_LSTMODE): deleted +		(RB_SCRIPT): added +		(RB_STYLE): added +		(RB_*): renumber +		(R_ST_EOL): added +		(R_ST_*): renumber +		(ST_IS_TAG): check R_ST_EOL +	* form.c (form_fputs_decode): remove <eol> handling +	* frame.c (newFrame): remove_space() +		(CASE_TABLE_TAG): added +		(createFrameFile): rewrite +	* html.c (TagMAP): delete eol +			add pre_plain, /pre_plain +	* html.h (HTML_EOL): deleted +		(HTML_PRE_PLAIN): added +		(HTML_N_PRE_PLAIN): added +	* table.c (visible_length): rewrite +		(visible_length_plain): added +		(maximum_visible_length_plain): added +		(do_refill): R_ST_EOL +		(table_close_select): end_tag +		(table_close_textarea): end_tag +		(TAG_ACTION_PLAIN): added +		(feed_table_tag): rewrite +		(feed_table): rewrite +	* table.h (TBLM_*) reassign +		(struct table_mode): delete ignore_tag +				add end_tag +	* tagtable.tab (eol): deleted +			(pre_plain): added +			(/pre_plain): added +  2002-12-03  Hironori SAKAMOTO <hsaka@mth.biglobe.ne.jp>  	* [w3m-dev 03505] Re: coredump when ssl error @@ -5303,4 +5351,4 @@ a	* [w3m-dev 03276] compile error on EWS4800  	* release-0-2-1  	* import w3m-0.2.1 -$Id: ChangeLog,v 1.575 2002/12/03 15:00:52 ukai Exp $ +$Id: ChangeLog,v 1.576 2002/12/03 15:35:09 ukai Exp $ @@ -1,4 +1,4 @@ -/* $Id: file.c,v 1.140 2002/12/03 15:00:53 ukai Exp $ */ +/* $Id: file.c,v 1.141 2002/12/03 15:35:10 ukai Exp $ */  #include "fm.h"  #include <sys/types.h>  #include "myctype.h" @@ -34,7 +34,6 @@ static FILE *lessopen_stream(char *path);  static Buffer *loadcmdout(char *cmd,  			  Buffer *(*loadproc) (URLFile *, Buffer *),  			  Buffer *defaultbuf); -static void close_textarea(struct html_feed_environ *h_env);  static void addnewline(Buffer *buf, char *line, Lineprop *prop,  #ifdef USE_ANSI_COLOR  		       Linecolor *color, @@ -4064,10 +4063,6 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)  	flushline(h_env, obuf, envs[h_env->envc].indent, 1, h_env->limit);  	h_env->blank_lines = 0;  	return 1; -    case HTML_EOL: -	if ((obuf->flag & RB_PREMODE) && obuf->pos > envs[h_env->envc].indent) -	    flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); -	return 1;      case HTML_H:  	if (!(obuf->flag & (RB_PREMODE | RB_IGNORE_P))) {  	    flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); @@ -4366,46 +4361,74 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)  	if (obuf->nobr_level == 0)  	    obuf->flag &= ~RB_NOBR;  	return 0; -    case HTML_LISTING: +    case HTML_PRE_PLAIN:  	CLOSE_P; -	flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); -	obuf->flag |= (RB_LSTMODE | RB_IGNORE_P); -	/* istr = str; */ +	if (!(obuf->flag & RB_IGNORE_P)) { +	    flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); +	    do_blankline(h_env, obuf, envs[h_env->envc].indent, 0, +			 h_env->limit); +	} +	obuf->flag |= (RB_PRE | RB_IGNORE_P);  	return 1; -    case HTML_N_LISTING: +    case HTML_N_PRE_PLAIN:  	CLOSE_P; -	flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); -	obuf->flag &= ~RB_LSTMODE; +	if (!(obuf->flag & RB_IGNORE_P)) { +	    flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); +	    do_blankline(h_env, obuf, envs[h_env->envc].indent, 0, +			 h_env->limit); +	    obuf->flag |= RB_IGNORE_P; +	} +	obuf->flag &= ~RB_PRE;  	return 1; +    case HTML_LISTING:      case HTML_XMP: +    case HTML_PLAINTEXT:  	CLOSE_P; -	flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); -	obuf->flag |= (RB_XMPMODE | RB_IGNORE_P); -	/* istr = str; */ +	if (!(obuf->flag & RB_IGNORE_P)) { +	    flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); +	    do_blankline(h_env, obuf, envs[h_env->envc].indent, 0, +			 h_env->limit); +	} +	obuf->flag |= (RB_PLAIN | RB_IGNORE_P); +	switch (cmd) { +	case HTML_LISTING: +	    obuf->end_tag = HTML_N_LISTING; +	    break; +	case HTML_XMP: +	    obuf->end_tag = HTML_N_XMP; +	    break; +	case HTML_PLAINTEXT: +	    obuf->end_tag = MAX_HTMLTAG; +	    break; +	}  	return 1; +    case HTML_N_LISTING:      case HTML_N_XMP:  	CLOSE_P; -	flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); -	obuf->flag &= ~RB_XMPMODE; +	if (!(obuf->flag & RB_IGNORE_P)) { +	    flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); +	    do_blankline(h_env, obuf, envs[h_env->envc].indent, 0, +			 h_env->limit); +	    obuf->flag |= RB_IGNORE_P; +	} +	obuf->flag &= ~RB_PLAIN; +	obuf->end_tag = 0;  	return 1;      case HTML_SCRIPT: -	obuf->flag |= RB_IGNORE; -	obuf->ignore_tag = Strnew_charp("</script>"); -	return 1; -    case HTML_N_SCRIPT: -	/* should not be reached */ +	obuf->flag |= RB_SCRIPT; +	obuf->end_tag = HTML_N_SCRIPT;  	return 1;      case HTML_STYLE: -	obuf->flag |= RB_IGNORE; -	obuf->ignore_tag = Strnew_charp("</style>"); +	obuf->flag |= RB_STYLE; +	obuf->end_tag = HTML_N_STYLE;  	return 1; -    case HTML_N_STYLE: -	/* should not be reached */ +    case HTML_N_SCRIPT: +	obuf->flag &= ~RB_SCRIPT; +	obuf->end_tag = 0;  	return 1; -    case HTML_PLAINTEXT: -	flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit); -	obuf->flag |= RB_PLAIN; -	/* istr = str; */ +    case HTML_N_STYLE: +	obuf->flag &= ~RB_STYLE; +	obuf->end_tag = 0;  	return 1;      case HTML_A:  	if (obuf->anchor) @@ -4513,7 +4536,7 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)  	table_mode[obuf->table_level].indent_level = 0;  	table_mode[obuf->table_level].nobr_level = 0;  	table_mode[obuf->table_level].caption = 0; -	table_mode[obuf->table_level].ignore_tag = NULL; +	table_mode[obuf->table_level].end_tag = 0;	/* HTML_UNKNOWN */  #ifndef TABLE_EXPAND  	tables[obuf->table_level]->total_width = width;  #else @@ -4572,9 +4595,11 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)  	if (tmp)  	    HTMLlineproc1(tmp->ptr, h_env);  	obuf->flag |= RB_INSELECT; +	obuf->end_tag = HTML_N_SELECT;  	return 1;      case HTML_N_SELECT:  	obuf->flag &= ~RB_INSELECT; +	obuf->end_tag = 0;  	tmp = process_n_select();  	if (tmp)  	    HTMLlineproc1(tmp->ptr, h_env); @@ -4587,9 +4612,14 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)  	if (tmp)  	    HTMLlineproc1(tmp->ptr, h_env);  	obuf->flag |= RB_INTXTA; +	obuf->end_tag = HTML_N_TEXTAREA;  	return 1;      case HTML_N_TEXTAREA: -	close_textarea(h_env); +	obuf->flag &= ~RB_INTXTA; +	obuf->end_tag = 0; +	tmp = process_n_textarea(); +	if (tmp) +	    HTMLlineproc1(tmp->ptr, h_env);  	return 1;      case HTML_ISINDEX:  	p = ""; @@ -5448,10 +5478,9 @@ table_width(struct html_feed_environ *h_env, int table_level)  /* HTML processing first pass */  void -HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal) +HTMLlineproc0(char *line, struct html_feed_environ *h_env, int internal)  {      Lineprop mode; -    char *q;      int cmd;      struct readbuffer *obuf = h_env->obuf;      int indent, delta; @@ -5467,25 +5496,12 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)  		(obuf->flag & RB_PREMODE) ? 'P' : ' ',  		(obuf->table_level >= 0) ? 'T' : ' ',  		(obuf->flag & RB_INTXTA) ? 'X' : ' ', -		(obuf->flag & RB_IGNORE) ? 'I' : ' '); -	fprintf(f, "HTMLlineproc1(\"%s\",%d,%lx)\n", str, h_env->limit, +		(obuf->flag & (RB_SCRIPT | RB_STYLE)) ? 'S' : ' '); +	fprintf(f, "HTMLlineproc1(\"%s\",%d,%lx)\n", line, h_env->limit,  		(unsigned long)h_env);  	fclose(f);      } -#if 0 -    /* comment processing */ -    if (obuf->status == R_ST_CMNT || obuf->status == R_ST_NCMNT3 || -	obuf->status == R_ST_IRRTAG) { -	while (*str != '\0' && obuf->status != R_ST_NORMAL) { -	    next_status(*str, &obuf->status); -	    str++; -	} -	if (obuf->status != R_ST_NORMAL) -	    return; -    } -#endif -      tokbuf = Strnew();    table_start: @@ -5496,132 +5512,93 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)  	tbl_width = table_width(h_env, level);      } -    while (*str != '\0') { +    while (*line != '\0') { +	char *str, *p;  	int is_tag = FALSE; -	int pre_mode = (obuf->table_level >= 0) ? -	    tbl_mode->pre_mode & TBLM_PLAIN : obuf->flag & RB_PLAINMODE; - -	if (obuf->flag & RB_PLAIN) -	    goto read_as_plain;	/* don't process tag */ +	int pre_mode = (obuf->table_level >= 0) ? tbl_mode->pre_mode : +		       obuf->flag; +	int end_tag = (obuf->table_level >= 0) ? tbl_mode->end_tag : +		      obuf->end_tag; -	if (ST_IS_COMMENT(obuf->status)) { -	    read_token(h_env->tagbuf, &str, &obuf->status, pre_mode, 1); -	    if (obuf->status != R_ST_NORMAL) -		return; -	    if (pre_mode) { -		is_tag = TRUE; -		q = h_env->tagbuf->ptr; -		goto read_as_pre_mode; -	    } -	    continue; -	} -	if (*str == '<' || ST_IS_TAG(obuf->status)) { +	if (*line == '<' || obuf->status != R_ST_NORMAL) {  	    /*   	     * Tag processing  	     */ -	    if (ST_IS_TAG(obuf->status)) { -/*** continuation of a tag ***/ -		read_token(h_env->tagbuf, &str, &obuf->status, pre_mode, 1); -	    } +	    if (obuf->status == R_ST_EOL) +		obuf->status = R_ST_NORMAL;  	    else { -		if (!REALLY_THE_BEGINNING_OF_A_TAG(str)) { -		    /* this is NOT a beginning of a tag */ -		    obuf->status = R_ST_NORMAL; -		    if (pre_mode) -			goto read_as_pre_mode; -		    HTMLlineproc1("<", h_env); -		    str++; -		    continue; -		} -		read_token(h_env->tagbuf, &str, &obuf->status, pre_mode, 0); -	    } -#if 0 -	    if (ST_IS_COMMENT(obuf->status)) { -		if ((obuf->table_level >= 0) ? tbl_mode->pre_mode & TBLM_IGNORE -		    : obuf->flag & RB_IGNORE) -		    /* within ignored tag, such as * -		     * <script>..</script>, don't process comment.  */ -		    obuf->status = R_ST_NORMAL; -		return; +	        read_token(h_env->tagbuf, &line, &obuf->status, +			   pre_mode & RB_PREMODE, obuf->status != R_ST_NORMAL); +		if (obuf->status != R_ST_NORMAL) +		    return;  	    } -#endif  	    if (h_env->tagbuf->length == 0)  		continue; -	    if (obuf->status != R_ST_NORMAL) { -		if (!pre_mode) { -		    if (Strlastchar(h_env->tagbuf) == '\n') -			Strchop(h_env->tagbuf); -		    if (ST_IS_REAL_TAG(obuf->status)) -			Strcat_char(h_env->tagbuf, ' '); +	    str = h_env->tagbuf->ptr; +	    if (*str == '<') { +		if (str[1] && REALLY_THE_BEGINNING_OF_A_TAG(str)) +		    is_tag = TRUE; +		else if (!(pre_mode & (RB_PLAIN | RB_INTXTA | RB_INSELECT | +				       RB_SCRIPT | RB_STYLE))) { +		    line = Strnew_m_charp(str + 1, line, NULL)->ptr; +		    str = "<";   		} -		if ((obuf->table_level >= 0) -		    ? ((tbl_mode->pre_mode & TBLM_IGNORE) && -		       !TAG_IS(h_env->tagbuf->ptr, tbl_mode->ignore_tag->ptr, -			       tbl_mode->ignore_tag->length - 1)) -		    : ((obuf->flag & RB_IGNORE) && -		       !TAG_IS(h_env->tagbuf->ptr, obuf->ignore_tag->ptr, -			       obuf->ignore_tag->length - 1))) -		    /* within ignored tag, such as * -		     * <script>..</script>, don't process tag.  */ -		    obuf->status = R_ST_NORMAL; -		continue;  	    } -	    is_tag = TRUE; -	    q = h_env->tagbuf->ptr; +	} +	else { +	    read_token(tokbuf, &line, &obuf->status, pre_mode & RB_PREMODE, 0); +	    if (obuf->status != R_ST_NORMAL)	/* R_ST_AMP ? */ +		continue; +	    str = tokbuf->ptr;  	} -      read_as_pre_mode: -	if (obuf->flag & (RB_INTXTA | RB_INSELECT | RB_IGNORE)) { -	    cmd = HTML_UNKNOWN; -	    if (!is_tag) { -		read_token(tokbuf, &str, &obuf->status, -			   (obuf->flag & RB_INTXTA) ? 1 : 0, 0); -		if (obuf->status != R_ST_NORMAL) -		    continue; -		q = tokbuf->ptr; -	    } -	    else { -		char *p = q; -		cmd = gethtmlcmd(&p); -	    } - -	    /* textarea */ -	    if (obuf->flag & RB_INTXTA) { -		if (cmd == HTML_N_TEXTAREA) -		    goto proc_normal; -		feed_textarea(q); +	if (pre_mode & (RB_PLAIN | RB_INTXTA | RB_INSELECT | RB_SCRIPT | +			RB_STYLE)) { +	    if (is_tag) { +		p = str; +	        if ((tag = parse_tag(&p, internal))) { +		    if (tag->tagid == end_tag || +			(pre_mode & RB_INSELECT && tag->tagid == HTML_N_FORM)) +			goto proc_normal; +		}  	    } -	    else if (obuf->flag & RB_INSELECT) { -		if (cmd == HTML_N_SELECT || cmd == HTML_N_FORM) +	    /* select */ +	    if (pre_mode & RB_INSELECT) { +		if (obuf->table_level >= 0)  		    goto proc_normal; -		feed_select(q); +		feed_select(str); +		continue;  	    } -	    /* script */ -	    else if (obuf->flag & RB_IGNORE) { -		if (TAG_IS(q, obuf->ignore_tag->ptr, -			   obuf->ignore_tag->length - 1)) { -		    obuf->flag &= ~RB_IGNORE; +	    if (is_tag) { +	        if (strncmp(str, "<!--", 4) && (p = strchr(str + 1, '<'))) { +		    str = Strnew_charp_n(str, p - str)->ptr; +		    line = Strnew_m_charp(p, line, NULL)->ptr;  		} +		is_tag = FALSE;  	    } -	    continue; +	    if (obuf->table_level >= 0) +		goto proc_normal; +	    /* textarea */ +	    if (pre_mode & RB_INTXTA) { +		feed_textarea(str); +	        continue; +	    } +	    /* script */ +	    if (pre_mode & RB_SCRIPT) +		continue; +	    /* style */ +	    if (pre_mode & RB_STYLE) +		continue;  	} +      proc_normal:  	if (obuf->table_level >= 0) {  	    /*   	     * within table: in <table>..</table>, all input tokens  	     * are fed to the table renderer, and then the renderer  	     * makes HTML output.  	     */ - -	    if (!is_tag) { -		read_token(tokbuf, &str, &obuf->status, -			   tbl_mode->pre_mode & TBLM_PREMODE, 0); -		if (obuf->status != R_ST_NORMAL) -		    continue; -		q = tokbuf->ptr; -	    } - -	    switch (feed_table(tbl, q, tbl_mode, tbl_width, internal)) { +	    switch (feed_table(tbl, str, tbl_mode, tbl_width, internal)) {  	    case 0:  		/* </table> tag */  		obuf->table_level--; @@ -5629,14 +5606,13 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)  		    continue;  		end_table(tbl);  		if (obuf->table_level >= 0) { -		    Str tmp;  		    struct table *tbl0 = tables[obuf->table_level]; -		    tmp = Sprintf("<table_alt tid=%d>", tbl0->ntable); +		    str = Sprintf("<table_alt tid=%d>", tbl0->ntable)->ptr;  		    pushTable(tbl0, tbl);  		    tbl = tbl0;  		    tbl_mode = &table_mode[obuf->table_level];  		    tbl_width = table_width(h_env, obuf->table_level); -		    feed_table(tbl, tmp->ptr, tbl_mode, tbl_width, TRUE); +		    feed_table(tbl, str, tbl_mode, tbl_width, TRUE);  		    continue;  		    /* continue to the next */  		} @@ -5659,27 +5635,17 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)  		continue;  	    case 1:  		/* <table> tag */ -		goto proc_normal; +		break;  	    default:  		continue;  	    }  	} -      proc_normal:  	if (is_tag) {  /*** Beginning of a new tag ***/ -	    if ((tag = parse_tag(&q, internal))) +	    if ((tag = parse_tag(&str, internal)))  		cmd = tag->tagid;  	    else -		cmd = HTML_UNKNOWN; -	    if (((obuf->flag & RB_XMPMODE) && cmd != HTML_N_XMP) || -		((obuf->flag & RB_LSTMODE) && cmd != HTML_N_LISTING)) { -		Str tmp = Strdup(h_env->tagbuf); -		Strcat_charp(tmp, str); -		str = tmp->ptr; -		goto read_as_plain; -	    } -	    if (cmd == HTML_UNKNOWN)  		continue;  	    /* process tags */  	    if (HTMLtagproc1(tag, h_env) == 0) { @@ -5701,12 +5667,12 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)  		continue;  	} -      read_as_plain: +      while (*str) {  	mode = get_mctype(str);  	delta = get_mclen(mode);  	if (obuf->flag & (RB_SPECIAL & ~RB_NOBR)) {  	    char ch = *str; -	    if (!(obuf->flag & RB_PLAINMODE) && (*str == '&')) { +	    if (!(obuf->flag & RB_PLAIN) && (*str == '&')) {  		char *p = str;  		int ech = getescapechar(&p);  		if (ech == '\n' || ech == '\r') { @@ -5739,7 +5705,7 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)  			 % Tabstop != 0);  		str++;  	    } -	    else if (obuf->flag & RB_PLAINMODE) { +	    else if (obuf->flag & RB_PLAIN) {  		char *p = html_quote_char(*str);  		if (p) {  		    push_charp(obuf, 1, p, PC_ASCII); @@ -5820,10 +5786,10 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)  #endif				/* FORMAT_NICE */  		HTMLlineproc1(line->ptr, h_env);  	    } +	  }  	}      } -    if (!(obuf->flag & (RB_PREMODE | RB_NOBR | RB_INTXTA | RB_INSELECT -			| RB_PLAINMODE | RB_IGNORE))) { +    if (!(obuf->flag & (RB_SPECIAL | RB_INTXTA | RB_INSELECT))) {  	char *tp;  	int i = 0; @@ -5849,17 +5815,6 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)      }  } -static void -close_textarea(struct html_feed_environ *h_env) -{ -    Str tmp; - -    h_env->obuf->flag &= ~RB_INTXTA; -    tmp = process_n_textarea(); -    if (tmp != NULL) -	HTMLlineproc1(tmp->ptr, h_env); -} -  extern char *NullLine;  extern Lineprop NullProp[]; @@ -6135,6 +6090,8 @@ completeHTMLstream(struct html_feed_environ *h_env, struct readbuffer *obuf)  	push_tag(obuf, "</u>", HTML_N_U);  	obuf->in_under = 0;      } +    if (obuf->flag & RB_INTXTA) +	HTMLlineproc1("</textarea>", h_env);      /* for unbalanced select tag */      if (obuf->flag & RB_INSELECT)  	HTMLlineproc1("</select>", h_env); @@ -6142,7 +6099,7 @@ completeHTMLstream(struct html_feed_environ *h_env, struct readbuffer *obuf)      /* for unbalanced table tag */      while (obuf->table_level >= 0) {  	table_mode[obuf->table_level].pre_mode -	    &= ~(TBLM_IGNORE | TBLM_XMP | TBLM_LST); +	    &= ~(TBLM_SCRIPT | TBLM_STYLE | TBLM_PLAIN);  	HTMLlineproc1("</table>", h_env);      }  } @@ -6351,8 +6308,10 @@ loadHTMLstream(URLFile *f, Buffer *newBuf, FILE * src, int internal)  #endif				/* USE_NNTP */  	HTMLlineproc0(lineBuf2->ptr, &htmlenv1, internal);      } -    if (obuf.status != R_ST_NORMAL) -	HTMLlineproc0(correct_irrtag(obuf.status)->ptr, &htmlenv1, internal); +    if (obuf.status != R_ST_NORMAL) { +	obuf.status = R_ST_EOL; +	HTMLlineproc0("\n", &htmlenv1, internal); +    }      obuf.status = R_ST_NORMAL;      completeHTMLstream(&htmlenv1, &obuf);      flushline(&htmlenv1, &obuf, 0, 2, htmlenv1.limit); @@ -1,4 +1,4 @@ -/* $Id: fm.h,v 1.90 2002/12/02 17:27:37 ukai Exp $ */ +/* $Id: fm.h,v 1.91 2002/12/03 15:35:10 ukai Exp $ */  /*    * w3m: WWW wo Miru utility   *  @@ -534,7 +534,7 @@ struct readbuffer {      long flag_stack[RB_STACK_SIZE];      int flag_sp;      int status; -    Str ignore_tag; +    unsigned char end_tag;      short table_level;      short nobr_level;      Str anchor; @@ -557,33 +557,30 @@ struct readbuffer {  #define in_stand fontstat[2]  #define RB_PRE		0x01 -#define RB_XMPMODE	0x02 -#define RB_LSTMODE	0x04 +#define RB_SCRIPT	0x02 +#define RB_STYLE	0x04  #define RB_PLAIN	0x08 -#define RB_LEFT		0x80000 -#define RB_CENTER	0x10 -#define RB_RIGHT	0x20 -#define RB_ALIGN	(RB_LEFT| RB_CENTER | RB_RIGHT) -#define RB_NOBR		0x40 -#define RB_P		0x80 -#define RB_PRE_INT	0x100 -#define RB_PREMODE	(RB_PRE | RB_PRE_INT) -#define RB_SPECIAL	(RB_PRE|RB_XMPMODE|RB_LSTMODE|RB_PLAIN|RB_NOBR|RB_PRE_INT) -#define RB_PLAINMODE	(RB_XMPMODE|RB_LSTMODE|RB_PLAIN) - -#define RB_IN_DT	0x200 -#define RB_INTXTA	0x400 -#define RB_INSELECT	0x800 -#define RB_IGNORE	0x1000 -#define RB_INSEL	0x2000 -#define RB_IGNORE_P	0x4000 -#define RB_TITLE	0x8000 -#define RB_NFLUSHED	0x10000 -#define RB_NOFRAMES	0x20000 -#define RB_INTABLE	0x40000 +#define RB_LEFT		0x10 +#define RB_CENTER	0x20 +#define RB_RIGHT	0x40 +#define RB_ALIGN	(RB_LEFT | RB_CENTER | RB_RIGHT) +#define RB_NOBR		0x80 +#define RB_P		0x100 +#define RB_PRE_INT	0x200 +#define RB_IN_DT	0x400 +#define RB_INTXTA	0x800 +#define RB_INSELECT	0x1000 +#define RB_IGNORE_P	0x2000 +#define RB_TITLE	0x4000 +#define RB_NFLUSHED	0x8000 +#define RB_NOFRAMES	0x10000 +#define RB_INTABLE	0x20000 +#define RB_PREMODE	(RB_PRE | RB_PRE_INT | RB_SCRIPT | RB_STYLE | RB_PLAIN | RB_INTXTA) +#define RB_SPECIAL	(RB_PRE | RB_PRE_INT | RB_SCRIPT | RB_STYLE | RB_PLAIN | RB_NOBR) +#define RB_PLAIN_PRE	0x40000  #ifdef FORMAT_NICE -#define RB_FILL		0x200000 +#define RB_FILL		0x80000  #endif				/* FORMAT_NICE */  #define RB_GET_ALIGN(obuf) ((obuf)->flag&RB_ALIGN) @@ -605,17 +602,18 @@ struct readbuffer {  #define R_ST_DQUOTE 4		/* within double quote */  #define R_ST_EQL    5		/* = */  #define R_ST_AMP    6		/* within ampersand quote */ -#define R_ST_CMNT1  7		/* <!  */ -#define R_ST_CMNT2  8		/* <!- */ -#define R_ST_CMNT   9		/* within comment */ -#define R_ST_NCMNT1 10		/* comment - */ -#define R_ST_NCMNT2 11		/* comment -- */ -#define R_ST_NCMNT3 12		/* comment -- space */ -#define R_ST_IRRTAG 13		/* within irregular tag */ +#define R_ST_EOL    7		/* end of file */ +#define R_ST_CMNT1  8		/* <!  */ +#define R_ST_CMNT2  9		/* <!- */ +#define R_ST_CMNT   10		/* within comment */ +#define R_ST_NCMNT1 11		/* comment - */ +#define R_ST_NCMNT2 12		/* comment -- */ +#define R_ST_NCMNT3 13		/* comment -- space */ +#define R_ST_IRRTAG 14		/* within irregular tag */  #define ST_IS_REAL_TAG(s)   ((s)==R_ST_TAG||(s)==R_ST_TAG0||(s)==R_ST_EQL)  #define ST_IS_COMMENT(s)    ((s)>=R_ST_CMNT1) -#define ST_IS_TAG(s)        ((s)!=R_ST_NORMAL&&(s)!=R_ST_AMP&&!ST_IS_COMMENT(s)) +#define ST_IS_TAG(s)        ((s)!=R_ST_NORMAL&&(s)!=R_ST_AMP&&!ST_IS_COMMENT(s)&&(s)!=R_ST_EOL)  /* is this '<' really means the beginning of a tag? */  #define REALLY_THE_BEGINNING_OF_A_TAG(p) \ @@ -1,4 +1,4 @@ -/* $Id: form.c,v 1.24 2002/11/15 16:48:14 ukai Exp $ */ +/* $Id: form.c,v 1.25 2002/12/03 15:35:10 ukai Exp $ */  /*    * HTML forms   */ @@ -430,16 +430,6 @@ form_fputs_decode(Str s, FILE * f)      for (p = s->ptr; *p;) {  	switch (*p) { -	case '<': -	    if (!strncasecmp(p, "<eol>", 5)) { -		Strcat_char(z, '\n'); -		p += 5; -	    } -	    else { -		Strcat_char(z, *p); -		p++; -	    } -	    break;  #if !defined( __CYGWIN__ ) && !defined( __EMX__ )  	case '\r':  	    if (*(p + 1) == '\n') @@ -1,4 +1,4 @@ -/* $Id: frame.c,v 1.22 2002/11/28 16:00:34 ukai Exp $ */ +/* $Id: frame.c,v 1.23 2002/12/03 15:35:10 ukai Exp $ */  #include "fm.h"  #include "parsetagx.h"  #include "myctype.h" @@ -97,7 +97,7 @@ newFrame(struct parsed_tag *tag, Buffer *buf)      body->baseURL = baseURL(buf);      if (tag) {  	if (parsedtag_get_value(tag, ATTR_SRC, &p)) -	    body->url = url_quote_conv(p, buf->document_code); +	    body->url = url_quote_conv(remove_space(p), buf->document_code);  	if (parsedtag_get_value(tag, ATTR_NAME, &p) && *p != '_')  	    body->name = url_quote_conv(p, buf->document_code);      } @@ -412,6 +412,23 @@ frame_download_source(struct frame_body *b, ParsedURL *currentURL,      return ret_frameset;  } +#define CASE_TABLE_TAG \ +	case HTML_TR:\ +	case HTML_N_TR:\ +	case HTML_TD:\ +	case HTML_N_TD:\ +	case HTML_TH:\ +	case HTML_N_TH:\ +	case HTML_THEAD:\ +	case HTML_N_THEAD:\ +	case HTML_TBODY:\ +	case HTML_N_TBODY:\ +	case HTML_TFOOT:\ +	case HTML_N_TFOOT:\ +	case HTML_COLGROUP:\ +	case HTML_N_COLGROUP:\ +	case HTML_COL +  static int  createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,  		int force_reload) @@ -467,8 +484,10 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,  	    struct frameset *f_frameset;  	    int i = c + r * f->col;  	    char *p = ""; +	    int status = R_ST_NORMAL;  	    Str tok = Strnew(); -	    int status; +	    int pre_mode = 0; +	    int end_tag = 0;  	    frame = f->frame[i]; @@ -557,12 +576,13 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,  		    break;  		}  		do { -		    status = R_ST_NORMAL; +		    int is_tag = FALSE; +		    char *q; +		    struct parsed_tag *tag; +  		    do {  			if (*p == '\0') {  			    Str tmp = StrmyUFgets(&f2); -			    if (tmp->length == 0 && status != R_ST_NORMAL) -				tmp = correct_irrtag(status);  			    if (tmp->length == 0)  				break;  #ifdef JP_CHARSET @@ -573,21 +593,67 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,  			    cleanup_line(tmp, HTML_MODE);  			    p = tmp->ptr;  			} -			if (status == R_ST_NORMAL) -			    read_token(tok, &p, &status, 1, 0); -			else if (ST_IS_COMMENT(status)) -			    read_token(tok, &p, &status, 0, 0); -			else -			    read_token(tok, &p, &status, 1, 1); +			read_token(tok, &p, &status, 1, status != R_ST_NORMAL);  		    } while (status != R_ST_NORMAL);  		    if (tok->length == 0)  			continue;  		    if (tok->ptr[0] == '<') { +			is_tag = TRUE; +			if (pre_mode & (RB_PLAIN | RB_INTXTA | RB_SCRIPT | +					RB_STYLE)) { +			    q = tok->ptr; +			    if ((tag = parse_tag(&q, FALSE)) && +				tag->tagid == end_tag) { +				if (pre_mode & RB_PLAIN) { +				    fputs("</PRE_PLAIN>", f1); +				    pre_mode = 0; +				    end_tag = 0; +				    goto token_end; +				} +				pre_mode = 0; +				end_tag = 0; +				goto proc_normal; +			    } +			    if (strncmp(tok->ptr, "<!--", 4) && +				(q = strchr(tok->ptr + 1, '<'))) { +				tok = Strnew_charp_n(tok->ptr, q - tok->ptr); +				p = Strnew_m_charp(q, p, NULL)->ptr; +				status = R_ST_NORMAL; +			    } +			    is_tag = FALSE; +			} +			else if (pre_mode & RB_INSELECT) { +                            q = tok->ptr; +			    if ((tag = parse_tag(&q, FALSE))) { +				if ((tag->tagid == end_tag) || +				    (tag->tagid == HTML_N_FORM)) { +				    if (tag->tagid == HTML_N_FORM) +					fputs("</SELECT>", f1); +				    pre_mode = 0; +				    end_tag = 0; +				    goto proc_normal; +				} +				if (t_stack) { +				    switch (tag->tagid) { +				    case HTML_TABLE: +				    case HTML_N_TABLE: +				    CASE_TABLE_TAG: +					fputs("</SELECT>", f1); +					pre_mode = 0; +					end_tag = 0; +				        goto proc_normal; +				    } +				} +			    } +			} +		    } + +		  proc_normal: +		    if (is_tag) {  			char *q = tok->ptr;  			int j, a_target = 0; -			struct parsed_tag *tag;  			ParsedURL url;  			if (!(tag = parse_tag(&q, FALSE))) @@ -603,7 +669,7 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,  			case HTML_BASE:  			    /* "BASE" is prohibit tag */  			    if (parsedtag_get_value(tag, ATTR_HREF, &q)) { -				q = url_quote_conv(q, code); +				q = url_quote_conv(remove_space(q), code);  				parseURL(q, &base, NULL);  			    }  			    if (parsedtag_get_value(tag, ATTR_TARGET, &q)) { @@ -660,18 +726,7 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,  				goto token_end;  			    }  			    break; -			case HTML_THEAD: -			case HTML_N_THEAD: -			case HTML_TBODY: -			case HTML_N_TBODY: -			case HTML_TFOOT: -			case HTML_N_TFOOT: -			case HTML_TD: -			case HTML_N_TD: -			case HTML_TR: -			case HTML_N_TR: -			case HTML_TH: -			case HTML_N_TH: +			CASE_TABLE_TAG:  			    /* table_tags MUST be in table stack */  			    if (!t_stack) {  				Strshrinkfirst(tok, 1); @@ -682,6 +737,37 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,  			    }  			    break; +			case HTML_SELECT: +			    pre_mode = RB_INSELECT; +			    end_tag = HTML_N_SELECT; +			    break; +			case HTML_TEXTAREA: +			    pre_mode = RB_INTXTA; +			    end_tag = HTML_N_TEXTAREA; +			    break; +			case HTML_SCRIPT: +			    pre_mode = RB_SCRIPT; +			    end_tag = HTML_N_SCRIPT; +			    break; +			case HTML_STYLE: +			    pre_mode = RB_STYLE; +			    end_tag = HTML_N_STYLE; +			    break; +			case HTML_LISTING: +			    pre_mode = RB_PLAIN; +			    end_tag = HTML_N_LISTING; +			    fputs("<PRE_PLAIN>", f1); +			    goto token_end; +			case HTML_XMP: +			    pre_mode = RB_PLAIN; +			    end_tag = HTML_N_XMP; +			    fputs("<PRE_PLAIN>", f1); +			    goto token_end; +			case HTML_PLAINTEXT: +			    pre_mode = RB_PLAIN; +			    end_tag = MAX_HTMLTAG; +			    fputs("<PRE_PLAIN>", f1); +			    goto token_end;  			default:  			    break;  			} @@ -693,7 +779,8 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,  				if (!tag->value[j])  				    break;  				tag->value[j] = -				    url_quote_conv(tag->value[j], code); +				    url_quote_conv(remove_space(tag->value[j]), +						   code);  				parseURL2(tag->value[j], &url, &base);  				if (url.scheme == SCM_UNKNOWN ||  #ifndef USE_W3MMAILER @@ -748,11 +835,28 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,  			    Strfputs(tok, f1);  		    }  		    else { -			Strfputs(tok, f1); +			if (pre_mode & (RB_PLAIN | RB_INTXTA)) +			    fprintf(f1, "%s", html_quote(tok->ptr)); +			else +			    Strfputs(tok, f1);  		    }  		  token_end:  		    Strclear(tok);  		} while (*p != '\0' || !iseos(f2.stream)); +		if (pre_mode & RB_PLAIN) +		    fputs("</PRE_PLAIN>\n", f1); +		else if (pre_mode & RB_INTXTA) +		    fputs("</TEXTAREA></FORM>\n", f1); +		else if (pre_mode & RB_INSELECT) +		    fputs("</SELECT></FORM>\n", f1); +		else if (pre_mode & (RB_SCRIPT | RB_STYLE)) { +		    if (status != R_ST_NORMAL) +			fputs(correct_irrtag(status)->ptr, f1); +		    if (pre_mode & RB_SCRIPT) +			fputs("</SCRIPT>\n", f1); +		    else if (pre_mode & RB_STYLE) +			fputs("</STYLE>\n", f1); +		}  		while (t_stack--)  		    fputs("</TABLE>\n", f1);  		UFclose(&f2); @@ -1,4 +1,4 @@ -/* $Id: html.c,v 1.17 2002/12/02 17:55:49 ukai Exp $ */ +/* $Id: html.c,v 1.18 2002/12/03 15:35:10 ukai Exp $ */  #include "html.h"  /* Define HTML Tag Infomation Table */ @@ -248,8 +248,9 @@ TagInfo TagMAP[MAX_HTMLTAG] = {      {"/input_alt", NULL, 0, TFLG_INT | TFLG_END},	/* 123 HTML_N_INPUT_ALT */      {"img_alt", ALST_IMG_ALT, MAXA_IMG_ALT, TFLG_INT},	/* 124 HTML_IMG_ALT     */      {"/img_alt", NULL, 0, TFLG_INT | TFLG_END},	/* 125 HTML_N_IMG_ALT   */ -    {"eol", NULL, 0, TFLG_INT},	/* 126 HTML_EOL         */ -    {" ", ALST_NOP, MAXA_NOP, TFLG_INT},	/* 127 HTML_NOP         */ +    {" ", ALST_NOP, MAXA_NOP, TFLG_INT},	/* 126 HTML_NOP         */ +    {"pre_plain", NULL, 0, TFLG_INT},	/* 127 HTML_PRE_PLAIN         */ +    {"/pre_plain", NULL, 0, TFLG_INT | TFLG_END},	/* 128 HTML_N_PRE_PLAIN         */  };  TagAttrInfo AttrMAP[MAX_TAGATTR] = { @@ -1,4 +1,4 @@ -/* $Id: html.h,v 1.11 2002/12/02 17:27:39 ukai Exp $ */ +/* $Id: html.h,v 1.12 2002/12/03 15:35:11 ukai Exp $ */  #ifndef _HTML_H  #define _HTML_H  #ifdef USE_SSL @@ -213,10 +213,11 @@ typedef struct {  #define HTML_N_INPUT_ALT    123  #define HTML_IMG_ALT        124  #define HTML_N_IMG_ALT      125 -#define HTML_EOL            126 -#define HTML_NOP	    127 +#define HTML_NOP	    126 +#define HTML_PRE_PLAIN	    127 +#define HTML_N_PRE_PLAIN    128 -#define MAX_HTMLTAG	    128 +#define MAX_HTMLTAG	    129  /* Tag attribute */ @@ -1,4 +1,4 @@ -/* $Id: table.c,v 1.31 2002/11/25 16:39:53 ukai Exp $ */ +/* $Id: table.c,v 1.32 2002/12/03 15:35:11 ukai Exp $ */  /*    * HTML table   */ @@ -490,11 +490,11 @@ visible_length(char *str)  	else if (status == R_ST_AMP) {  	    if (prev_status == R_ST_NORMAL) {  		Strclear(tagbuf); +		len--;  		amp_len = 0;  	    }  	    else {  		Strcat_char(tagbuf, *str); -		len++;  		amp_len++;  	    }  	} @@ -502,10 +502,13 @@ visible_length(char *str)  	    Strcat_char(tagbuf, *str);  	    r2 = tagbuf->ptr;  	    t = getescapecmd(&r2); -	    len += strlen(t) - 1 - amp_len; -	    if (*r2 != '\0') { -		str -= strlen(r2); +	    if (!*r2 && (*t == '\r' || *t == '\n')) { +		if (len > max_len) +		    max_len = len; +		len = 0;  	    } +	    else +		len += strlen(t) + strlen(r2);  	}  	else if (status == R_ST_NORMAL && ST_IS_REAL_TAG(prev_status)) {  	    ; @@ -516,22 +519,42 @@ visible_length(char *str)  		len++;  	    } while ((visible_length_offset + len) % Tabstop != 0);  	} -	else if (*str == '\n' || *str == '\r') { +	else if (*str == '\r' || *str == '\n') { +	    len--;  	    if (len > max_len)  		max_len = len;  	    len = 0;  	} -	else if (*str == '\n' || *str == '\r') -	    len = 0;  	str++;      }      if (status == R_ST_AMP) {  	r2 = tagbuf->ptr;  	t = getescapecmd(&r2); -	len += strlen(t) - 1 - amp_len; -	if (*r2 != '\0') { -	    len += strlen(r2); +	if (*t != '\r' && *t != '\n') +	    len += strlen(t) + strlen(r2); +    } +    return len > max_len ? len : max_len; +} + +int +visible_length_plain(char *str) +{ +    int len = 0, max_len = 0; + +    while (*str) { +	if (*str == '\t') { +	    do { +		len++; +	    } while ((visible_length_offset + len) % Tabstop != 0); +	} +	else if (*str == '\r' || *str == '\n') { +	    if (len > max_len) +		max_len = len; +	    len = 0;  	} +	else +	    len++; +	str++;      }      return len > max_len ? len : max_len;  } @@ -558,6 +581,28 @@ maximum_visible_length(char *str)      return maxlen;  } +int +maximum_visible_length_plain(char *str) +{ +    int maxlen, len; + +    visible_length_offset = 0; +    maxlen = visible_length_plain(str); + +    if (!strchr(str, '\t')) +	return maxlen; + +    for (visible_length_offset = 1; visible_length_offset < Tabstop; +	 visible_length_offset++) { +	len = visible_length_plain(str); +	if (maxlen < len) { +	    maxlen = len; +	    break; +	} +    } +    return maxlen; +} +  void  align(TextLine *lbuf, int width, int mode)  { @@ -810,6 +855,10 @@ do_refill(struct table *tbl, int row, int col, int maxlimit)  	else  	    HTMLlineproc1(l->ptr, &h_env);      } +    if (obuf.status != R_ST_NORMAL) { +	obuf.status = R_ST_EOL; +	HTMLlineproc1("\n", &h_env); +    }      completeHTMLstream(&h_env, &obuf);      flushline(&h_env, &obuf, 0, 2, h_env.limit);      if (tbl->border_mode == BORDER_NONE) { @@ -2361,6 +2410,7 @@ table_close_select(struct table *tbl, struct table_mode *mode, int width)  {      Str tmp = process_n_select();      mode->pre_mode &= ~TBLM_INSELECT; +    mode->end_tag = 0;      feed_table1(tbl, tmp, mode, width);  } @@ -2369,6 +2419,7 @@ table_close_textarea(struct table *tbl, struct table_mode *mode, int width)  {      Str tmp = process_n_textarea();      mode->pre_mode &= ~TBLM_INTXTA; +    mode->end_tag = 0;      feed_table1(tbl, tmp, mode, width);  } @@ -2394,6 +2445,7 @@ table_close_anchor0(struct table *tbl, struct table_mode *mode)  #define TAG_ACTION_FEED 1  #define TAG_ACTION_TABLE 2  #define TAG_ACTION_N_TABLE 3 +#define TAG_ACTION_PLAIN 4  #define CASE_TABLE_TAG \  	case HTML_TABLE:\ @@ -2429,53 +2481,62 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode,      cmd = tag->tagid; -    if (mode->pre_mode & TBLM_IGNORE) { -	switch (cmd) { -	case HTML_N_STYLE: -	    mode->pre_mode &= ~TBLM_STYLE; +    if (mode->pre_mode & TBLM_PLAIN) { +	if (mode->end_tag == cmd) { +	    mode->pre_mode &= ~TBLM_PLAIN; +	    mode->end_tag = 0; +	    feed_table_block_tag(tbl, line, mode, 0, cmd);  	    return TAG_ACTION_NONE; -	case HTML_N_SCRIPT: +	} +	return TAG_ACTION_PLAIN; +    } +    if (mode->pre_mode & TBLM_INTXTA) { +	if (mode->end_tag == cmd) { +	    table_close_textarea(tbl, mode, width); +	    return TAG_ACTION_NONE; +	} +	return TAG_ACTION_FEED; +    } +    if (mode->pre_mode & TBLM_SCRIPT) { +	if (mode->end_tag == cmd) {  	    mode->pre_mode &= ~TBLM_SCRIPT; +	    mode->end_tag = 0;  	    return TAG_ACTION_NONE; -	default: +	} +	return TAG_ACTION_PLAIN; +    } +    if (mode->pre_mode & TBLM_STYLE) { +	if (mode->end_tag == cmd) { +	    mode->pre_mode &= ~TBLM_STYLE; +	    mode->end_tag = 0;  	    return TAG_ACTION_NONE;  	} +	return TAG_ACTION_PLAIN;      } - -    switch (cmd) { -      CASE_TABLE_TAG: -	if (mode->caption) -	    mode->caption = 0; -	if (mode->pre_mode & (TBLM_IGNORE | TBLM_XMP | TBLM_LST)) -	    mode->pre_mode &= ~(TBLM_IGNORE | TBLM_XMP | TBLM_LST); -	if (mode->pre_mode & TBLM_INTXTA) -	    table_close_textarea(tbl, mode, width); -	if (mode->pre_mode & TBLM_INSELECT) +    /* failsafe: a tag other than <option></option>and </select> in * +     * <select> environment is regarded as the end of <select>. */ +    if (mode->pre_mode & TBLM_INSELECT) { +	switch (cmd) { +	CASE_TABLE_TAG: +	case HTML_N_FORM: +	case HTML_N_SELECT:	/* mode->end_tag */  	    table_close_select(tbl, mode, width); +	    break; +	default: +	    return TAG_ACTION_FEED; +	}      } -      if (mode->caption) {  	switch (cmd) { +	CASE_TABLE_TAG:  	case HTML_N_CAPTION:  	    mode->caption = 0; -	    return TAG_ACTION_NONE; +	    break;  	default:  	    return TAG_ACTION_FEED;  	}      } -    /* failsafe: a tag other than <option></option>and </select> in * -     * <select> environment is regarded as the end of <select>. */ -    if (mode->pre_mode & TBLM_INSELECT && cmd == HTML_N_FORM) { -	table_close_select(tbl, mode, width); -    } - -    if ((mode->pre_mode & TBLM_INSELECT && cmd != HTML_N_SELECT) || -	(mode->pre_mode & TBLM_INTXTA && cmd != HTML_N_TEXTAREA) || -	(mode->pre_mode & TBLM_XMP && cmd != HTML_N_XMP) || -	(mode->pre_mode & TBLM_LST && cmd != HTML_N_LISTING)) -	return TAG_ACTION_FEED; -      if (mode->pre_mode & TBLM_PRE) {  	switch (cmd) {  	case HTML_NOBR: @@ -2742,33 +2803,33 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode,      case HTML_LI:      case HTML_PRE:      case HTML_N_PRE: +    case HTML_HR:      case HTML_LISTING: -    case HTML_N_LISTING:      case HTML_XMP: -    case HTML_N_XMP:      case HTML_PLAINTEXT: +    case HTML_PRE_PLAIN: +    case HTML_N_PRE_PLAIN:  	feed_table_block_tag(tbl, line, mode, 0, cmd);  	switch (cmd) {  	case HTML_PRE: +	case HTML_PRE_PLAIN:  	    mode->pre_mode |= TBLM_PRE;  	    break;  	case HTML_N_PRE: +	case HTML_N_PRE_PLAIN:  	    mode->pre_mode &= ~TBLM_PRE;  	    break;  	case HTML_LISTING: -	    mode->pre_mode |= TBLM_LST; -	    break; -	case HTML_N_LISTING: -	    mode->pre_mode &= ~TBLM_LST; +	    mode->pre_mode |= TBLM_PLAIN; +	    mode->end_tag = HTML_N_LISTING;  	    break;  	case HTML_XMP: -	    mode->pre_mode |= TBLM_XMP; -	    break; -	case HTML_N_XMP: -	    mode->pre_mode &= ~TBLM_XMP; +	    mode->pre_mode |= TBLM_PLAIN; +	    mode->end_tag = HTML_N_XMP;  	    break;  	case HTML_PLAINTEXT: -	    mode->pre_mode |= TBLM_PLAINTEXT; +	    mode->pre_mode |= TBLM_PLAIN; +	    mode->end_tag = MAX_HTMLTAG;  	    break;  	}  	break; @@ -2857,9 +2918,7 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode,  	if (tmp)  	    feed_table1(tbl, tmp, mode, width);  	mode->pre_mode |= TBLM_INSELECT; -	break; -    case HTML_N_SELECT: -	table_close_select(tbl, mode, width); +	mode->end_tag = HTML_N_SELECT;  	break;      case HTML_OPTION:  	/* nothing */ @@ -2880,9 +2939,7 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode,  	if (tmp)  	    feed_table1(tbl, tmp, mode, width);  	mode->pre_mode |= TBLM_INTXTA; -	break; -    case HTML_N_TEXTAREA: -	table_close_textarea(tbl, mode, width); +	mode->end_tag = HTML_N_TEXTAREA;  	break;      case HTML_A:  	table_close_anchor0(tbl, mode); @@ -2969,11 +3026,11 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode,  	break;      case HTML_SCRIPT:  	mode->pre_mode |= TBLM_SCRIPT; -	mode->ignore_tag = Strnew_charp("</script>"); +	mode->end_tag = HTML_N_SCRIPT;  	break;      case HTML_STYLE:  	mode->pre_mode |= TBLM_STYLE; -	mode->ignore_tag = Strnew_charp("</style>"); +	mode->end_tag = HTML_N_STYLE;  	break;      case HTML_N_A:  	table_close_anchor0(tbl, mode); @@ -2994,7 +3051,6 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode,      case HTML_TEXTAREA_INT:      case HTML_N_TEXTAREA_INT:      case HTML_IMG_ALT: -    case HTML_EOL:      case HTML_RULE:      case HTML_N_RULE:      default: @@ -3014,25 +3070,29 @@ feed_table(struct table *tbl, char *line, struct table_mode *mode,      Str tmp;      struct table_linfo *linfo = &tbl->linfo; -    if (*line == '<') { -	int action; +    if (*line == '<' && line[1] && REALLY_THE_BEGINNING_OF_A_TAG(line)) {  	struct parsed_tag *tag;  	p = line;  	tag = parse_tag(&p, internal);  	if (tag) { -	    action = feed_table_tag(tbl, line, mode, width, tag); -	    if (action == TAG_ACTION_NONE) +	    switch (feed_table_tag(tbl, line, mode, width, tag)) { +	    case TAG_ACTION_NONE:  		return -1; -	    else if (action == TAG_ACTION_N_TABLE) +	    case TAG_ACTION_N_TABLE:  		return 0; -	    else if (action == TAG_ACTION_TABLE) { +	    case TAG_ACTION_TABLE:  		return 1; +	    case TAG_ACTION_PLAIN: +		break; +	    case TAG_ACTION_FEED: +	    default: +		if (parsedtag_need_reconstruct(tag)) +		    line = parsedtag2str(tag)->ptr;  	    } -	    else if (parsedtag_need_reconstruct(tag)) -		line = parsedtag2str(tag)->ptr;  	}  	else { -	    if (!(mode->pre_mode & TBLM_PLAIN)) +	    if (!(mode->pre_mode & (TBLM_PLAIN | TBLM_INTXTA | TBLM_INSELECT | +				    TBLM_SCRIPT | TBLM_STYLE)))  		return -1;  	}      } @@ -3040,7 +3100,9 @@ feed_table(struct table *tbl, char *line, struct table_mode *mode,  	Strcat_charp(tbl->caption, line);  	return -1;      } -    if (mode->pre_mode & TBLM_IGNORE) +    if (mode->pre_mode & TBLM_SCRIPT) +	return -1; +    if (mode->pre_mode & TBLM_STYLE)  	return -1;      if (mode->pre_mode & TBLM_INTXTA) {  	feed_textarea(line); @@ -3100,7 +3162,7 @@ feed_table(struct table *tbl, char *line, struct table_mode *mode,  	}  	line = tmp->ptr;      } -    if (!(mode->pre_mode & TBLM_SPECIAL)) { +    if (!(mode->pre_mode & (TBLM_SPECIAL & ~TBLM_NOBR))) {  	if (!(tbl->flag & TBL_IN_COL) || linfo->prev_spaces != 0)  	    while (IS_SPACE(*line))  		line++; @@ -3114,25 +3176,51 @@ feed_table(struct table *tbl, char *line, struct table_mode *mode,  	i = skip_space(tbl, line, linfo, !(mode->pre_mode & TBLM_NOBR));  	addcontentssize(tbl, visible_length(line) - i);  	setwidth(tbl, mode); +	pushdata(tbl, tbl->row, tbl->col, line);      } -    else { -	/* <pre> mode or something like it */ +    else if (mode->pre_mode & TBLM_PRE_INT) {  	check_rowcol(tbl, mode); -	if (mode->pre_mode & TBLM_PRE_INT && mode->nobr_offset < 0) +	if (mode->nobr_offset < 0)  	    mode->nobr_offset = tbl->tabcontentssize; -	if (mode->pre_mode & TBLM_PLAIN) -	    i = strlen(line); -	else -	    i = maximum_visible_length(line); -	addcontentssize(tbl, i); +	addcontentssize(tbl, maximum_visible_length(line));  	setwidth(tbl, mode); -	if (!(mode->pre_mode & TBLM_PRE_INT)) { -	    p = line + strlen(line) - 1; -	    if (*p == '\r' || *p == '\n') +	pushdata(tbl, tbl->row, tbl->col, line); +    } +    else { +	/* <pre> mode or something like it */ +	check_rowcol(tbl, mode); +	while (*line) { +	    int nl = FALSE; +	    if ((p = strchr(line, '\r')) || (p = strchr(line, '\n'))) { +		if (*p == '\r' && p[1] == '\n') +		    p++; +		if (p[1]) { +		    p++; +		    tmp = Strnew_charp_n(line, p - line); +		    line = p; +		    p = tmp->ptr; +		} +		else { +		    p = line; +		    line = ""; +		} +		nl = TRUE; +	    } +	    else { +		p = line; +		line = ""; +	    } +	    if (mode->pre_mode & TBLM_PLAIN) +	        i = maximum_visible_length_plain(p); +	    else +	        i = maximum_visible_length(p); +	    addcontentssize(tbl, i); +	    setwidth(tbl, mode); +	    if (nl)  		clearcontentssize(tbl, mode); +	    pushdata(tbl, tbl->row, tbl->col, p);  	}      } -    pushdata(tbl, tbl->row, tbl->col, line);      return -1;  } @@ -1,4 +1,4 @@ -/* $Id: table.h,v 1.8 2002/11/25 16:39:53 ukai Exp $ */ +/* $Id: table.h,v 1.9 2002/12/03 15:35:11 ukai Exp $ */  #if (defined(MESCHACH) && !defined(MATRIX))  #define MATRIX  #endif				/* (defined(MESCHACH) && !defined(MATRIX)) */ @@ -114,21 +114,17 @@ struct table {      int sloppy_width;  }; -#define TBLM_PRE 1 -#define TBLM_NOBR 2 -#define TBLM_XMP 4 -#define TBLM_LST 8 -#define TBLM_PLAINTEXT 16 -#define TBLM_PRE_INT 32 -#define TBLM_INTXTA 64 -#define TBLM_INSELECT 128 -#define TBLM_PREMODE (TBLM_PRE|TBLM_INTXTA|TBLM_INSELECT|TBLM_PLAIN) -#define TBLM_SPECIAL (TBLM_PRE|TBLM_PRE_INT|TBLM_PLAIN) -#define TBLM_PLAIN (TBLM_PLAINTEXT|TBLM_XMP|TBLM_LST) -#define TBLM_SCRIPT 256 -#define TBLM_STYLE 512 -#define TBLM_IGNORE (TBLM_SCRIPT|TBLM_STYLE) -#define TBLM_ANCHOR 1024 +#define TBLM_PRE	RB_PRE +#define TBLM_SCRIPT	RB_SCRIPT +#define TBLM_STYLE	RB_STYLE +#define TBLM_PLAIN	RB_PLAIN +#define TBLM_NOBR	RB_NOBR +#define TBLM_PRE_INT	RB_PRE_INT +#define TBLM_INTXTA	RB_INTXTA +#define TBLM_INSELECT	RB_INSELECT +#define TBLM_PREMODE	(TBLM_PRE | TBLM_PRE_INT | TBLM_SCRIPT | TBLM_STYLE | TBLM_PLAIN | TBLM_INTXTA) +#define TBLM_SPECIAL	(TBLM_PRE | TBLM_PRE_INT | TBLM_SCRIPT | TBLM_STYLE | TBLM_PLAIN | TBLM_NOBR) +#define TBLM_ANCHOR	0x100000  #define  uchar           unsigned char  #define  ushort           unsigned short @@ -139,7 +135,7 @@ struct table_mode {      short nobr_offset;      char nobr_level;      short anchor_offset; -    Str ignore_tag; +    unsigned char end_tag;  };  /* Local Variables:    */ diff --git a/tagtable.tab b/tagtable.tab index 17aaa92..a4c95aa 100644 --- a/tagtable.tab +++ b/tagtable.tab @@ -154,7 +154,6 @@ input_alt	HTML_INPUT_ALT  /input_alt	HTML_N_INPUT_ALT  img_alt		HTML_IMG_ALT  /img_alt	HTML_N_IMG_ALT -eol		HTML_EOL  pre_int		HTML_PRE_INT  /pre_int	HTML_N_PRE_INT  bgsound		HTML_BGSOUND @@ -167,3 +166,5 @@ select_int	HTML_SELECT_INT  option_int	HTML_OPTION_INT  textarea_int	HTML_TEXTAREA_INT  /textarea_int	HTML_N_TEXTAREA_INT +pre_plain	HTML_PRE_PLAIN +/pre_plain	HTML_N_PRE_PLAIN  | 
