diff options
author | Ambrose Li <ambrose.li@gmail.com> | 2020-08-24 02:20:43 +0000 |
---|---|---|
committer | Ambrose Li <ambrose.li@gmail.com> | 2020-08-24 02:20:43 +0000 |
commit | 9f18e67a9bb5ff0387f76e5a2870b49558f868ad (patch) | |
tree | feb45c9c4e518c0ab784b82b25b6ea85158e0455 | |
parent | Update ChangeLog (diff) | |
download | w3m-9f18e67a9bb5ff0387f76e5a2870b49558f868ad.tar.gz w3m-9f18e67a9bb5ff0387f76e5a2870b49558f868ad.zip |
Cleaned version of 20200823_q branch. Changes the behaviour of the q tag (when m17n and Unicode are configured) to use "smart" quotes if the display charset can handle them. Falls back to old behaviour (ASCII quotes with left/right quote semantics for 6/0 and 2/6) if display charset is us-ascii.
Also changes the behaviour of conv_entity() to convert left/right quotes and some dashes because named entities are needed for the new code for the q tag.
-rw-r--r-- | entity.c | 14 | ||||
-rw-r--r-- | file.c | 18 | ||||
-rw-r--r-- | fm.h | 1 | ||||
-rw-r--r-- | tests/name_entity_1.expected | 2 | ||||
-rw-r--r-- | tests/name_entity_1.html | 2 | ||||
-rw-r--r-- | tests/name_entity_1.opts | 2 | ||||
-rw-r--r-- | tests/name_entity_2.expected | 1 | ||||
-rw-r--r-- | tests/name_entity_2.html | 1 | ||||
-rw-r--r-- | tests/q1.expected | 1 | ||||
-rw-r--r-- | tests/q1.html | 2 | ||||
-rw-r--r-- | tests/q1.opts | 2 | ||||
-rw-r--r-- | tests/q2.expected | 1 | ||||
-rw-r--r-- | tests/q2.html | 3 | ||||
-rw-r--r-- | tests/q3.expected | 1 | ||||
-rw-r--r-- | tests/q3.html | 3 | ||||
-rw-r--r-- | tests/q3.opts | 4 | ||||
-rw-r--r-- | tests/q4.expected | 1 | ||||
-rw-r--r-- | tests/q4.html | 3 | ||||
-rw-r--r-- | tests/q4.opts | 2 | ||||
-rw-r--r-- | tests/q5.expected | 1 | ||||
-rw-r--r-- | tests/q5.html | 3 | ||||
-rw-r--r-- | tests/q6.expected | 1 | ||||
-rw-r--r-- | tests/q6.html | 3 | ||||
-rw-r--r-- | tests/q6.opts | 1 | ||||
-rw-r--r-- | tests/run_tests | 31 |
25 files changed, 103 insertions, 1 deletions
@@ -58,11 +58,23 @@ conv_entity(unsigned int c) #ifdef USE_M17N #ifdef USE_UNICODE if (c <= WC_C_UCS4_END) { /* Unicode */ + char *chk; wc_uchar utf8[7]; wc_ucs_to_utf8(c, utf8); - return wc_conv((char *)utf8, WC_CES_UTF_8, InnerCharset)->ptr; + /* we eventually need to display it so check DisplayCharset */ + chk = wc_conv((char *)utf8, WC_CES_UTF_8, DisplayCharset ? DisplayCharset : WC_CES_US_ASCII)->ptr; + if (strcmp(chk, "?") != 0) + return wc_conv((char *)utf8, WC_CES_UTF_8, InnerCharset)->ptr; } #endif #endif + if (c == 0x201c || c == 0x201f || c == 0x201d || c == 0x2033) + return "\""; + if (c == 0x2018 || c == 0x201b || c == 0x2019 || c == 0x2032) + return "'"; + if (c >= 0x2010 && c < 0x2014) + return "-"; + if (c == 0x2014) + return "--"; return "?"; } @@ -4487,9 +4487,27 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) HTMLlineproc1("</b>", h_env); return 1; case HTML_Q: +#ifdef USE_M17N +#ifdef USE_UNICODE + if (DisplayCharset != WC_CES_US_ASCII) { + HTMLlineproc1((obuf->q_level & 1 ? "“": "‘"), h_env); + obuf->q_level += 1; + } + else +#endif +#endif HTMLlineproc1("`", h_env); return 1; case HTML_N_Q: +#ifdef USE_M17N +#ifdef USE_UNICODE + if (DisplayCharset != WC_CES_US_ASCII) { + obuf->q_level -= 1; + HTMLlineproc1((obuf->q_level & 1 ? "”": "’"), h_env); + } + else +#endif +#endif HTMLlineproc1("'", h_env); return 1; case HTML_FIGURE: @@ -610,6 +610,7 @@ struct readbuffer { int flag_sp; int status; unsigned char end_tag; + unsigned char q_level; short table_level; short nobr_level; Anchor anchor; diff --git a/tests/name_entity_1.expected b/tests/name_entity_1.expected new file mode 100644 index 0000000..2a6fd2c --- /dev/null +++ b/tests/name_entity_1.expected @@ -0,0 +1,2 @@ +This is an example sentence that contains some "quoted words" -- +punctuation that would be displayed as question marks but should not. diff --git a/tests/name_entity_1.html b/tests/name_entity_1.html new file mode 100644 index 0000000..f2e3633 --- /dev/null +++ b/tests/name_entity_1.html @@ -0,0 +1,2 @@ +This is an example sentence that contains some “quoted words” — +<br>punctuation that would be displayed as question marks but should not. diff --git a/tests/name_entity_1.opts b/tests/name_entity_1.opts new file mode 100644 index 0000000..f9ac4b0 --- /dev/null +++ b/tests/name_entity_1.opts @@ -0,0 +1,2 @@ +-O +us-ascii diff --git a/tests/name_entity_2.expected b/tests/name_entity_2.expected new file mode 100644 index 0000000..b287794 --- /dev/null +++ b/tests/name_entity_2.expected @@ -0,0 +1 @@ +2πr diff --git a/tests/name_entity_2.html b/tests/name_entity_2.html new file mode 100644 index 0000000..9be30f0 --- /dev/null +++ b/tests/name_entity_2.html @@ -0,0 +1 @@ +2πr diff --git a/tests/q1.expected b/tests/q1.expected new file mode 100644 index 0000000..84b67a1 --- /dev/null +++ b/tests/q1.expected @@ -0,0 +1 @@ +`test' diff --git a/tests/q1.html b/tests/q1.html new file mode 100644 index 0000000..292b019 --- /dev/null +++ b/tests/q1.html @@ -0,0 +1,2 @@ +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<q>test</q> diff --git a/tests/q1.opts b/tests/q1.opts new file mode 100644 index 0000000..f9ac4b0 --- /dev/null +++ b/tests/q1.opts @@ -0,0 +1,2 @@ +-O +us-ascii diff --git a/tests/q2.expected b/tests/q2.expected new file mode 100644 index 0000000..bd72179 --- /dev/null +++ b/tests/q2.expected @@ -0,0 +1 @@ +“test” diff --git a/tests/q2.html b/tests/q2.html new file mode 100644 index 0000000..8f1e80b --- /dev/null +++ b/tests/q2.html @@ -0,0 +1,3 @@ +<!doctype html> +<meta charset=utf-8> +<q>test</q> diff --git a/tests/q3.expected b/tests/q3.expected new file mode 100644 index 0000000..c5e67df --- /dev/null +++ b/tests/q3.expected @@ -0,0 +1 @@ +test diff --git a/tests/q3.html b/tests/q3.html new file mode 100644 index 0000000..672a868 --- /dev/null +++ b/tests/q3.html @@ -0,0 +1,3 @@ +<!doctype html> +<meta charset=Big5> +<q>test</q> diff --git a/tests/q3.opts b/tests/q3.opts new file mode 100644 index 0000000..32d2b1a --- /dev/null +++ b/tests/q3.opts @@ -0,0 +1,4 @@ +-I +windows-1252 +-O +windows-1252 diff --git a/tests/q4.expected b/tests/q4.expected new file mode 100644 index 0000000..c5e67df --- /dev/null +++ b/tests/q4.expected @@ -0,0 +1 @@ +test diff --git a/tests/q4.html b/tests/q4.html new file mode 100644 index 0000000..672a868 --- /dev/null +++ b/tests/q4.html @@ -0,0 +1,3 @@ +<!doctype html> +<meta charset=Big5> +<q>test</q> diff --git a/tests/q4.opts b/tests/q4.opts new file mode 100644 index 0000000..074570d --- /dev/null +++ b/tests/q4.opts @@ -0,0 +1,2 @@ +-O +windows-1252 diff --git a/tests/q5.expected b/tests/q5.expected new file mode 100644 index 0000000..4f4614d --- /dev/null +++ b/tests/q5.expected @@ -0,0 +1 @@ +“example of a ‘nested’ quote” diff --git a/tests/q5.html b/tests/q5.html new file mode 100644 index 0000000..f4fe761 --- /dev/null +++ b/tests/q5.html @@ -0,0 +1,3 @@ +<!doctype html> +<meta charset=utf-8> +<q>example of a <q>nested</q> quote</q> diff --git a/tests/q6.expected b/tests/q6.expected new file mode 100644 index 0000000..fc98f57 --- /dev/null +++ b/tests/q6.expected @@ -0,0 +1 @@ +"example of a 'nested' quote" diff --git a/tests/q6.html b/tests/q6.html new file mode 100644 index 0000000..e5f1de5 --- /dev/null +++ b/tests/q6.html @@ -0,0 +1,3 @@ +<!doctype html> +<meta charset=big5> +<q>example of a <q>nested</q> quote</q> diff --git a/tests/q6.opts b/tests/q6.opts new file mode 100644 index 0000000..1b59fbf --- /dev/null +++ b/tests/q6.opts @@ -0,0 +1 @@ +-O Big5 diff --git a/tests/run_tests b/tests/run_tests new file mode 100644 index 0000000..0ec3080 --- /dev/null +++ b/tests/run_tests @@ -0,0 +1,31 @@ +total=0 +pass=0 +fail=0 +w3m="../w3m +-config +/dev/null +-o +ignore_null_img_alt=false" +for i in *.html; do + cmd="$w3m +-I +utf-8 +-O +utf-8 +-T +text/html" + opts="`basename "$i" .html`.opts" + test -f "$opts" && cmd="$cmd +`grep -v '^#' $opts`" + if (set -x;IFS=' +';$cmd) < "$i" | diff -u - "`basename "$i" .html`.expected"; then + pass="`expr 1 + "$pass"`" + else + fail="`expr 1 + "$fail"`" + fi + total="`expr 1 + "$total"`" +done +echo "TOTAL: $total test(s)" +echo "PASS : $pass" +echo "FAIL : $fail" +test 0 -eq "$fail" |