diff options
author | Ambrose Li <ambrose.li@gmail.com> | 2020-08-25 03:48:09 +0000 |
---|---|---|
committer | Ambrose Li <ambrose.li@gmail.com> | 2020-08-25 03:48:09 +0000 |
commit | 48c9ec565d0a8e147adb61eb79777812688bfbaa (patch) | |
tree | d249686b321e535348c1426eddfa10b610ef5858 | |
parent | Update ChangeLog (diff) | |
download | w3m-48c9ec565d0a8e147adb61eb79777812688bfbaa.tar.gz w3m-48c9ec565d0a8e147adb61eb79777812688bfbaa.zip |
In HTML5 anchors should not be closed when encountering divs, for example, but should be closed when encountering buttons, for example. Many sites that use HTML5-style anchors end up having links displayed with zero-length link texts. The proposed patch correct this behaviour by detecting whether the document is HTML5, then suppressing the close-anchor action in CLOSE_A if it's an HTML5 document. A new macro handles the HTML5-specific cases where anchors are not already always closed.
This also fixes a bug in the tokenizing FSM in etc.c that prevented the !doctype element from being recognized; the fix is necessary because HTML5 detection depends on checking the !doctype element.
-rw-r--r-- | etc.c | 5 | ||||
-rw-r--r-- | file.c | 25 | ||||
-rw-r--r-- | fm.h | 3 | ||||
-rw-r--r-- | html.c | 6 | ||||
-rw-r--r-- | html.h | 1 | ||||
-rw-r--r-- | tests/a1.expected | 2 | ||||
-rw-r--r-- | tests/a1.html | 1 | ||||
-rw-r--r-- | tests/a2.expected | 1 | ||||
-rw-r--r-- | tests/a2.html | 3 | ||||
-rw-r--r-- | tests/run_tests | 31 |
10 files changed, 72 insertions, 6 deletions
@@ -727,6 +727,11 @@ next_status(char c, int *status) case '>': *status = R_ST_NORMAL; break; + case 'D': + case 'd': + /* could be a !doctype */ + *status = R_ST_TAG; + break; default: *status = R_ST_IRRTAG; } @@ -1,4 +1,5 @@ /* $Id: file.c,v 1.266 2012/05/22 09:45:56 inu Exp $ */ +/* vi: set sw=4 ts=8 ai sm noet : */ #include "fm.h" #include <sys/types.h> #include "myctype.h" @@ -4322,9 +4323,18 @@ process_idattr(struct readbuffer *obuf, int cmd, struct parsed_tag *tag) obuf->flag &= ~RB_P;\ } -#define CLOSE_A \ - CLOSE_P; \ - close_anchor(h_env, obuf); +#define HTML5_CLOSE_A do { \ + if (obuf->flag & RB_HTML5) { \ + close_anchor(h_env, obuf); \ + } \ + } while (0) + +#define CLOSE_A do { \ + CLOSE_P; \ + if (!(obuf->flag & RB_HTML5)) { \ + close_anchor(h_env, obuf); \ + } \ + } while (0) #define CLOSE_DT \ if (obuf->flag & RB_IN_DT) { \ @@ -4930,6 +4940,8 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) close_anchor(h_env, obuf); return 1; case HTML_IMG: + if (parsedtag_exists(tag, ATTR_USEMAP)) + HTML5_CLOSE_A; tmp = process_img(tag, h_env->limit); HTMLlineproc1(tmp->ptr, h_env); return 1; @@ -5125,6 +5137,7 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) HTMLlineproc1(tmp->ptr, h_env); return 1; case HTML_BUTTON: + HTML5_CLOSE_A; tmp = process_button(tag); if (tmp) HTMLlineproc1(tmp->ptr, h_env); @@ -5180,6 +5193,11 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) NULL); HTMLlineproc1(tmp->ptr, h_env); return 1; + case HTML_DOCTYPE: + if (!parsedtag_exists(tag, ATTR_PUBLIC)) { + obuf->flag |= RB_HTML5; + } + return 1; case HTML_META: p = q = r = NULL; parsedtag_get_value(tag, ATTR_HTTP_EQUIV, &p); @@ -5378,6 +5396,7 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) } return 1; case HTML_EMBED: + HTML5_CLOSE_A; if (view_unseenobject) { if (parsedtag_get_value(tag, ATTR_SRC, &p)) { Str s; @@ -661,6 +661,7 @@ struct readbuffer { #endif /* FORMAT_NICE */ #define RB_DEL 0x100000 #define RB_S 0x200000 +#define RB_HTML5 0x400000 #define RB_GET_ALIGN(obuf) ((obuf)->flag&RB_ALIGN) #define RB_SET_ALIGN(obuf,align) {(obuf)->flag &= ~RB_ALIGN; (obuf)->flag |= (align); } @@ -673,7 +674,7 @@ struct readbuffer { RB_SET_ALIGN(obuf,(obuf)->flag_stack[--(obuf)->flag_sp]); \ } -/* status flags */ +/* state of token scanning finite state machine */ #define R_ST_NORMAL 0 /* normal */ #define R_ST_TAG0 1 /* within tag, just after < */ #define R_ST_TAG 2 /* within tag */ @@ -37,6 +37,8 @@ unsigned char ALST_TABLE[] = ATTR_CELLPADDING, ATTR_VSPACE, ATTR_CORE }; #define MAXA_TABLE MAXA_CORE + 6 +unsigned char ALST_DOCTYPE[] = { ATTR_PUBLIC }; /* only (html and) public should be checked */ +#define MAXA_DOCTYPE 1 unsigned char ALST_META[] = { ATTR_HTTP_EQUIV, ATTR_CONTENT, ATTR_CHARSET, ATTR_CORE }; #define MAXA_META MAXA_CORE + 3 unsigned char ALST_FRAME[] = { ATTR_SRC, ATTR_NAME, ATTR_CORE }; @@ -221,7 +223,7 @@ TagInfo TagMAP[MAX_HTMLTAG] = { {"/option", NULL, 0, TFLG_END}, /* 94 HTML_N_OPTION */ {"head", ALST_NOP, MAXA_NOP, 0}, /* 95 HTML_HEAD */ {"/head", NULL, 0, TFLG_END}, /* 96 HTML_N_HEAD */ - {"doctype", ALST_NOP, MAXA_NOP, 0}, /* 97 HTML_DOCTYPE */ + {"doctype", ALST_DOCTYPE, MAXA_DOCTYPE, 0}, /* 97 HTML_DOCTYPE */ {"noframes", ALST_NOFRAMES, MAXA_NOFRAMES, 0}, /* 98 HTML_NOFRAMES */ {"/noframes", NULL, 0, TFLG_END}, /* 99 HTML_N_NOFRAMES */ @@ -367,7 +369,7 @@ TagAttrInfo AttrMAP[MAX_TAGATTR] = { {"rev", VTYPE_STR, 0}, /* 48 ATTR_REV */ {"title", VTYPE_STR, 0}, /* 49 ATTR_TITLE */ {"accesskey", VTYPE_STR, 0}, /* 50 ATTR_ACCESSKEY */ - {NULL, VTYPE_NONE, 0}, /* 51 Undefined */ + {"public", VTYPE_NONE, 0}, /* 51 ATTR_PUBLIC */ {NULL, VTYPE_NONE, 0}, /* 52 Undefined */ {NULL, VTYPE_NONE, 0}, /* 53 Undefined */ {NULL, VTYPE_NONE, 0}, /* 54 Undefined */ @@ -318,6 +318,7 @@ typedef struct { #define ATTR_REV 48 #define ATTR_TITLE 49 #define ATTR_ACCESSKEY 50 +#define ATTR_PUBLIC 51 /* Internal attribute */ #define ATTR_XOFFSET 60 diff --git a/tests/a1.expected b/tests/a1.expected new file mode 100644 index 0000000..5812232 --- /dev/null +++ b/tests/a1.expected @@ -0,0 +1,2 @@ + +test diff --git a/tests/a1.html b/tests/a1.html new file mode 100644 index 0000000..7e89006 --- /dev/null +++ b/tests/a1.html @@ -0,0 +1 @@ +<a href="example"><div>test</div></a> diff --git a/tests/a2.expected b/tests/a2.expected new file mode 100644 index 0000000..9daeafb --- /dev/null +++ b/tests/a2.expected @@ -0,0 +1 @@ +test diff --git a/tests/a2.html b/tests/a2.html new file mode 100644 index 0000000..197b006 --- /dev/null +++ b/tests/a2.html @@ -0,0 +1,3 @@ +<!doctype html> +<meta charset=utf-8> +<a href="example"><div>test</div></a> diff --git a/tests/run_tests b/tests/run_tests new file mode 100644 index 0000000..0ec3080 --- /dev/null +++ b/tests/run_tests @@ -0,0 +1,31 @@ +total=0 +pass=0 +fail=0 +w3m="../w3m +-config +/dev/null +-o +ignore_null_img_alt=false" +for i in *.html; do + cmd="$w3m +-I +utf-8 +-O +utf-8 +-T +text/html" + opts="`basename "$i" .html`.opts" + test -f "$opts" && cmd="$cmd +`grep -v '^#' $opts`" + if (set -x;IFS=' +';$cmd) < "$i" | diff -u - "`basename "$i" .html`.expected"; then + pass="`expr 1 + "$pass"`" + else + fail="`expr 1 + "$fail"`" + fi + total="`expr 1 + "$total"`" +done +echo "TOTAL: $total test(s)" +echo "PASS : $pass" +echo "FAIL : $fail" +test 0 -eq "$fail" |