From 9f18e67a9bb5ff0387f76e5a2870b49558f868ad Mon Sep 17 00:00:00 2001 From: Ambrose Li Date: Sun, 23 Aug 2020 22:20:43 -0400 Subject: Cleaned version of 20200823_q branch. Changes the behaviour of the q tag (when m17n and Unicode are configured) to use "smart" quotes if the display charset can handle them. Falls back to old behaviour (ASCII quotes with left/right quote semantics for 6/0 and 2/6) if display charset is us-ascii. Also changes the behaviour of conv_entity() to convert left/right quotes and some dashes because named entities are needed for the new code for the q tag. --- entity.c | 14 +++++++++++++- file.c | 18 ++++++++++++++++++ fm.h | 1 + tests/name_entity_1.expected | 2 ++ tests/name_entity_1.html | 2 ++ tests/name_entity_1.opts | 2 ++ tests/name_entity_2.expected | 1 + tests/name_entity_2.html | 1 + tests/q1.expected | 1 + tests/q1.html | 2 ++ tests/q1.opts | 2 ++ tests/q2.expected | 1 + tests/q2.html | 3 +++ tests/q3.expected | 1 + tests/q3.html | 3 +++ tests/q3.opts | 4 ++++ tests/q4.expected | 1 + tests/q4.html | 3 +++ tests/q4.opts | 2 ++ tests/q5.expected | 1 + tests/q5.html | 3 +++ tests/q6.expected | 1 + tests/q6.html | 3 +++ tests/q6.opts | 1 + tests/run_tests | 31 +++++++++++++++++++++++++++++++ 25 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 tests/name_entity_1.expected create mode 100644 tests/name_entity_1.html create mode 100644 tests/name_entity_1.opts create mode 100644 tests/name_entity_2.expected create mode 100644 tests/name_entity_2.html create mode 100644 tests/q1.expected create mode 100644 tests/q1.html create mode 100644 tests/q1.opts create mode 100644 tests/q2.expected create mode 100644 tests/q2.html create mode 100644 tests/q3.expected create mode 100644 tests/q3.html create mode 100644 tests/q3.opts create mode 100644 tests/q4.expected create mode 100644 tests/q4.html create mode 100644 tests/q4.opts create mode 100644 tests/q5.expected create mode 100644 tests/q5.html create mode 100644 tests/q6.expected create mode 100644 tests/q6.html create mode 100644 tests/q6.opts create mode 100644 tests/run_tests diff --git a/entity.c b/entity.c index 45dc95e..67b8cfb 100644 --- a/entity.c +++ b/entity.c @@ -58,11 +58,23 @@ conv_entity(unsigned int c) #ifdef USE_M17N #ifdef USE_UNICODE if (c <= WC_C_UCS4_END) { /* Unicode */ + char *chk; wc_uchar utf8[7]; wc_ucs_to_utf8(c, utf8); - return wc_conv((char *)utf8, WC_CES_UTF_8, InnerCharset)->ptr; + /* we eventually need to display it so check DisplayCharset */ + chk = wc_conv((char *)utf8, WC_CES_UTF_8, DisplayCharset ? DisplayCharset : WC_CES_US_ASCII)->ptr; + if (strcmp(chk, "?") != 0) + return wc_conv((char *)utf8, WC_CES_UTF_8, InnerCharset)->ptr; } #endif #endif + if (c == 0x201c || c == 0x201f || c == 0x201d || c == 0x2033) + return "\""; + if (c == 0x2018 || c == 0x201b || c == 0x2019 || c == 0x2032) + return "'"; + if (c >= 0x2010 && c < 0x2014) + return "-"; + if (c == 0x2014) + return "--"; return "?"; } diff --git a/file.c b/file.c index c0fc044..cf7a931 100644 --- a/file.c +++ b/file.c @@ -4487,9 +4487,27 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) HTMLlineproc1("", h_env); return 1; case HTML_Q: +#ifdef USE_M17N +#ifdef USE_UNICODE + if (DisplayCharset != WC_CES_US_ASCII) { + HTMLlineproc1((obuf->q_level & 1 ? "“": "‘"), h_env); + obuf->q_level += 1; + } + else +#endif +#endif HTMLlineproc1("`", h_env); return 1; case HTML_N_Q: +#ifdef USE_M17N +#ifdef USE_UNICODE + if (DisplayCharset != WC_CES_US_ASCII) { + obuf->q_level -= 1; + HTMLlineproc1((obuf->q_level & 1 ? "”": "’"), h_env); + } + else +#endif +#endif HTMLlineproc1("'", h_env); return 1; case HTML_FIGURE: diff --git a/fm.h b/fm.h index 4a17ecc..6ce0f19 100644 --- a/fm.h +++ b/fm.h @@ -610,6 +610,7 @@ struct readbuffer { int flag_sp; int status; unsigned char end_tag; + unsigned char q_level; short table_level; short nobr_level; Anchor anchor; diff --git a/tests/name_entity_1.expected b/tests/name_entity_1.expected new file mode 100644 index 0000000..2a6fd2c --- /dev/null +++ b/tests/name_entity_1.expected @@ -0,0 +1,2 @@ +This is an example sentence that contains some "quoted words" -- +punctuation that would be displayed as question marks but should not. diff --git a/tests/name_entity_1.html b/tests/name_entity_1.html new file mode 100644 index 0000000..f2e3633 --- /dev/null +++ b/tests/name_entity_1.html @@ -0,0 +1,2 @@ +This is an example sentence that contains some “quoted words” — +
punctuation that would be displayed as question marks but should not. diff --git a/tests/name_entity_1.opts b/tests/name_entity_1.opts new file mode 100644 index 0000000..f9ac4b0 --- /dev/null +++ b/tests/name_entity_1.opts @@ -0,0 +1,2 @@ +-O +us-ascii diff --git a/tests/name_entity_2.expected b/tests/name_entity_2.expected new file mode 100644 index 0000000..b287794 --- /dev/null +++ b/tests/name_entity_2.expected @@ -0,0 +1 @@ +2πr diff --git a/tests/name_entity_2.html b/tests/name_entity_2.html new file mode 100644 index 0000000..9be30f0 --- /dev/null +++ b/tests/name_entity_2.html @@ -0,0 +1 @@ +2πr diff --git a/tests/q1.expected b/tests/q1.expected new file mode 100644 index 0000000..84b67a1 --- /dev/null +++ b/tests/q1.expected @@ -0,0 +1 @@ +`test' diff --git a/tests/q1.html b/tests/q1.html new file mode 100644 index 0000000..292b019 --- /dev/null +++ b/tests/q1.html @@ -0,0 +1,2 @@ + +test diff --git a/tests/q1.opts b/tests/q1.opts new file mode 100644 index 0000000..f9ac4b0 --- /dev/null +++ b/tests/q1.opts @@ -0,0 +1,2 @@ +-O +us-ascii diff --git a/tests/q2.expected b/tests/q2.expected new file mode 100644 index 0000000..bd72179 --- /dev/null +++ b/tests/q2.expected @@ -0,0 +1 @@ +“test” diff --git a/tests/q2.html b/tests/q2.html new file mode 100644 index 0000000..8f1e80b --- /dev/null +++ b/tests/q2.html @@ -0,0 +1,3 @@ + + +test diff --git a/tests/q3.expected b/tests/q3.expected new file mode 100644 index 0000000..c5e67df --- /dev/null +++ b/tests/q3.expected @@ -0,0 +1 @@ +test diff --git a/tests/q3.html b/tests/q3.html new file mode 100644 index 0000000..672a868 --- /dev/null +++ b/tests/q3.html @@ -0,0 +1,3 @@ + + +test diff --git a/tests/q3.opts b/tests/q3.opts new file mode 100644 index 0000000..32d2b1a --- /dev/null +++ b/tests/q3.opts @@ -0,0 +1,4 @@ +-I +windows-1252 +-O +windows-1252 diff --git a/tests/q4.expected b/tests/q4.expected new file mode 100644 index 0000000..c5e67df --- /dev/null +++ b/tests/q4.expected @@ -0,0 +1 @@ +test diff --git a/tests/q4.html b/tests/q4.html new file mode 100644 index 0000000..672a868 --- /dev/null +++ b/tests/q4.html @@ -0,0 +1,3 @@ + + +test diff --git a/tests/q4.opts b/tests/q4.opts new file mode 100644 index 0000000..074570d --- /dev/null +++ b/tests/q4.opts @@ -0,0 +1,2 @@ +-O +windows-1252 diff --git a/tests/q5.expected b/tests/q5.expected new file mode 100644 index 0000000..4f4614d --- /dev/null +++ b/tests/q5.expected @@ -0,0 +1 @@ +“example of a ‘nested’ quote” diff --git a/tests/q5.html b/tests/q5.html new file mode 100644 index 0000000..f4fe761 --- /dev/null +++ b/tests/q5.html @@ -0,0 +1,3 @@ + + +example of a nested quote diff --git a/tests/q6.expected b/tests/q6.expected new file mode 100644 index 0000000..fc98f57 --- /dev/null +++ b/tests/q6.expected @@ -0,0 +1 @@ +"example of a 'nested' quote" diff --git a/tests/q6.html b/tests/q6.html new file mode 100644 index 0000000..e5f1de5 --- /dev/null +++ b/tests/q6.html @@ -0,0 +1,3 @@ + + +example of a nested quote diff --git a/tests/q6.opts b/tests/q6.opts new file mode 100644 index 0000000..1b59fbf --- /dev/null +++ b/tests/q6.opts @@ -0,0 +1 @@ +-O Big5 diff --git a/tests/run_tests b/tests/run_tests new file mode 100644 index 0000000..0ec3080 --- /dev/null +++ b/tests/run_tests @@ -0,0 +1,31 @@ +total=0 +pass=0 +fail=0 +w3m="../w3m +-config +/dev/null +-o +ignore_null_img_alt=false" +for i in *.html; do + cmd="$w3m +-I +utf-8 +-O +utf-8 +-T +text/html" + opts="`basename "$i" .html`.opts" + test -f "$opts" && cmd="$cmd +`grep -v '^#' $opts`" + if (set -x;IFS=' +';$cmd) < "$i" | diff -u - "`basename "$i" .html`.expected"; then + pass="`expr 1 + "$pass"`" + else + fail="`expr 1 + "$fail"`" + fi + total="`expr 1 + "$total"`" +done +echo "TOTAL: $total test(s)" +echo "PASS : $pass" +echo "FAIL : $fail" +test 0 -eq "$fail" -- cgit v1.2.3 From b9488ffe60963349bf622a7548e3b9dccc6e0728 Mon Sep 17 00:00:00 2001 From: Ambrose Li Date: Sat, 29 Aug 2020 15:23:36 -0400 Subject: Somehow the wrong quotes were used. This should fix the failing tests. --- file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/file.c b/file.c index cf7a931..6011cf2 100644 --- a/file.c +++ b/file.c @@ -4490,7 +4490,7 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) #ifdef USE_M17N #ifdef USE_UNICODE if (DisplayCharset != WC_CES_US_ASCII) { - HTMLlineproc1((obuf->q_level & 1 ? "“": "‘"), h_env); + HTMLlineproc1((obuf->q_level & 1 ? "‘": "“"), h_env); obuf->q_level += 1; } else @@ -4503,7 +4503,7 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env) #ifdef USE_UNICODE if (DisplayCharset != WC_CES_US_ASCII) { obuf->q_level -= 1; - HTMLlineproc1((obuf->q_level & 1 ? "”": "’"), h_env); + HTMLlineproc1((obuf->q_level & 1 ? "’": "”"), h_env); } else #endif -- cgit v1.2.3