aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTatsuya Kinoshita <tats@debian.org>2020-08-30 00:57:45 +0000
committerGitHub <noreply@github.com>2020-08-30 00:57:45 +0000
commit6339dd9f13ec171765a139cf2ec3c7f669235ecd (patch)
tree7d9de0bb4df6ac6883de4ddb9fc83a371286ddd9
parentMerge pull request #145 from acli/20200824_zh_TW_CLEANED (diff)
parentIn HTML5 anchors should not be closed when encountering divs, for example, bu... (diff)
downloadw3m-6339dd9f13ec171765a139cf2ec3c7f669235ecd.tar.gz
w3m-6339dd9f13ec171765a139cf2ec3c7f669235ecd.zip
Merge pull request #146 from acli/20200821_a_CLEANED
Patch to make w3m’s handling of the a element HTML5 compatible (when the stream is HTML5)
-rw-r--r--etc.c5
-rw-r--r--file.c25
-rw-r--r--fm.h3
-rw-r--r--html.c6
-rw-r--r--html.h1
-rw-r--r--tests/a1.expected2
-rw-r--r--tests/a1.html1
-rw-r--r--tests/a2.expected1
-rw-r--r--tests/a2.html3
9 files changed, 41 insertions, 6 deletions
diff --git a/etc.c b/etc.c
index 37c4f15..16d1295 100644
--- a/etc.c
+++ b/etc.c
@@ -727,6 +727,11 @@ next_status(char c, int *status)
case '>':
*status = R_ST_NORMAL;
break;
+ case 'D':
+ case 'd':
+ /* could be a !doctype */
+ *status = R_ST_TAG;
+ break;
default:
*status = R_ST_IRRTAG;
}
diff --git a/file.c b/file.c
index 6011cf2..34e445c 100644
--- a/file.c
+++ b/file.c
@@ -1,4 +1,5 @@
/* $Id: file.c,v 1.266 2012/05/22 09:45:56 inu Exp $ */
+/* vi: set sw=4 ts=8 ai sm noet : */
#include "fm.h"
#include <sys/types.h>
#include "myctype.h"
@@ -4322,9 +4323,18 @@ process_idattr(struct readbuffer *obuf, int cmd, struct parsed_tag *tag)
obuf->flag &= ~RB_P;\
}
-#define CLOSE_A \
- CLOSE_P; \
- close_anchor(h_env, obuf);
+#define HTML5_CLOSE_A do { \
+ if (obuf->flag & RB_HTML5) { \
+ close_anchor(h_env, obuf); \
+ } \
+ } while (0)
+
+#define CLOSE_A do { \
+ CLOSE_P; \
+ if (!(obuf->flag & RB_HTML5)) { \
+ close_anchor(h_env, obuf); \
+ } \
+ } while (0)
#define CLOSE_DT \
if (obuf->flag & RB_IN_DT) { \
@@ -4948,6 +4958,8 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)
close_anchor(h_env, obuf);
return 1;
case HTML_IMG:
+ if (parsedtag_exists(tag, ATTR_USEMAP))
+ HTML5_CLOSE_A;
tmp = process_img(tag, h_env->limit);
HTMLlineproc1(tmp->ptr, h_env);
return 1;
@@ -5143,6 +5155,7 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)
HTMLlineproc1(tmp->ptr, h_env);
return 1;
case HTML_BUTTON:
+ HTML5_CLOSE_A;
tmp = process_button(tag);
if (tmp)
HTMLlineproc1(tmp->ptr, h_env);
@@ -5198,6 +5211,11 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)
NULL);
HTMLlineproc1(tmp->ptr, h_env);
return 1;
+ case HTML_DOCTYPE:
+ if (!parsedtag_exists(tag, ATTR_PUBLIC)) {
+ obuf->flag |= RB_HTML5;
+ }
+ return 1;
case HTML_META:
p = q = r = NULL;
parsedtag_get_value(tag, ATTR_HTTP_EQUIV, &p);
@@ -5396,6 +5414,7 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)
}
return 1;
case HTML_EMBED:
+ HTML5_CLOSE_A;
if (view_unseenobject) {
if (parsedtag_get_value(tag, ATTR_SRC, &p)) {
Str s;
diff --git a/fm.h b/fm.h
index 6ce0f19..e536e51 100644
--- a/fm.h
+++ b/fm.h
@@ -662,6 +662,7 @@ struct readbuffer {
#endif /* FORMAT_NICE */
#define RB_DEL 0x100000
#define RB_S 0x200000
+#define RB_HTML5 0x400000
#define RB_GET_ALIGN(obuf) ((obuf)->flag&RB_ALIGN)
#define RB_SET_ALIGN(obuf,align) {(obuf)->flag &= ~RB_ALIGN; (obuf)->flag |= (align); }
@@ -674,7 +675,7 @@ struct readbuffer {
RB_SET_ALIGN(obuf,(obuf)->flag_stack[--(obuf)->flag_sp]); \
}
-/* status flags */
+/* state of token scanning finite state machine */
#define R_ST_NORMAL 0 /* normal */
#define R_ST_TAG0 1 /* within tag, just after < */
#define R_ST_TAG 2 /* within tag */
diff --git a/html.c b/html.c
index 90cf66e..35a5787 100644
--- a/html.c
+++ b/html.c
@@ -37,6 +37,8 @@ unsigned char ALST_TABLE[] =
ATTR_CELLPADDING, ATTR_VSPACE, ATTR_CORE
};
#define MAXA_TABLE MAXA_CORE + 6
+unsigned char ALST_DOCTYPE[] = { ATTR_PUBLIC }; /* only (html and) public should be checked */
+#define MAXA_DOCTYPE 1
unsigned char ALST_META[] = { ATTR_HTTP_EQUIV, ATTR_CONTENT, ATTR_CHARSET, ATTR_CORE };
#define MAXA_META MAXA_CORE + 3
unsigned char ALST_FRAME[] = { ATTR_SRC, ATTR_NAME, ATTR_CORE };
@@ -221,7 +223,7 @@ TagInfo TagMAP[MAX_HTMLTAG] = {
{"/option", NULL, 0, TFLG_END}, /* 94 HTML_N_OPTION */
{"head", ALST_NOP, MAXA_NOP, 0}, /* 95 HTML_HEAD */
{"/head", NULL, 0, TFLG_END}, /* 96 HTML_N_HEAD */
- {"doctype", ALST_NOP, MAXA_NOP, 0}, /* 97 HTML_DOCTYPE */
+ {"doctype", ALST_DOCTYPE, MAXA_DOCTYPE, 0}, /* 97 HTML_DOCTYPE */
{"noframes", ALST_NOFRAMES, MAXA_NOFRAMES, 0}, /* 98 HTML_NOFRAMES */
{"/noframes", NULL, 0, TFLG_END}, /* 99 HTML_N_NOFRAMES */
@@ -367,7 +369,7 @@ TagAttrInfo AttrMAP[MAX_TAGATTR] = {
{"rev", VTYPE_STR, 0}, /* 48 ATTR_REV */
{"title", VTYPE_STR, 0}, /* 49 ATTR_TITLE */
{"accesskey", VTYPE_STR, 0}, /* 50 ATTR_ACCESSKEY */
- {NULL, VTYPE_NONE, 0}, /* 51 Undefined */
+ {"public", VTYPE_NONE, 0}, /* 51 ATTR_PUBLIC */
{NULL, VTYPE_NONE, 0}, /* 52 Undefined */
{NULL, VTYPE_NONE, 0}, /* 53 Undefined */
{NULL, VTYPE_NONE, 0}, /* 54 Undefined */
diff --git a/html.h b/html.h
index 6da6f5f..7b173c3 100644
--- a/html.h
+++ b/html.h
@@ -318,6 +318,7 @@ typedef struct {
#define ATTR_REV 48
#define ATTR_TITLE 49
#define ATTR_ACCESSKEY 50
+#define ATTR_PUBLIC 51
/* Internal attribute */
#define ATTR_XOFFSET 60
diff --git a/tests/a1.expected b/tests/a1.expected
new file mode 100644
index 0000000..5812232
--- /dev/null
+++ b/tests/a1.expected
@@ -0,0 +1,2 @@
+
+test
diff --git a/tests/a1.html b/tests/a1.html
new file mode 100644
index 0000000..7e89006
--- /dev/null
+++ b/tests/a1.html
@@ -0,0 +1 @@
+<a href="example"><div>test</div></a>
diff --git a/tests/a2.expected b/tests/a2.expected
new file mode 100644
index 0000000..9daeafb
--- /dev/null
+++ b/tests/a2.expected
@@ -0,0 +1 @@
+test
diff --git a/tests/a2.html b/tests/a2.html
new file mode 100644
index 0000000..197b006
--- /dev/null
+++ b/tests/a2.html
@@ -0,0 +1,3 @@
+<!doctype html>
+<meta charset=utf-8>
+<a href="example"><div>test</div></a>